From d4351428f61401a13a105220bfb73eb2e488c293 Mon Sep 17 00:00:00 2001 From: juice094 Date: Thu, 4 Jun 2026 17:39:03 +0800 Subject: [PATCH 01/11] Add FTS5 skill search and pluggable external skill sources Phase 1 (schema v35): FTS5 virtual table over skills with BM25 ranking - skills_fts with triggers for name/description/tags/category indexing - search_skills_text() now uses FTS5-first with LIKE fallback - BM25 weights: name=1.0, desc=0.8, tags=0.4, category=0.2 Phase 2 (schema v36): Pluggable external skill sources - SkillSource trait with GitHubSource and LocalFileSource - sync_sources and sync_log tables for audit trail - devkit_skill_sync MCP tool (Beta tier) - devbase skill import CLI subcommand 70 MCP tools (was 69). All 494 tests pass. Co-Authored-By: Claude Opus 4.7 --- .claude/CLAUDE.md | 4 +- Cargo.lock | 1 + Cargo.toml | 1 + server.json | 5 +- src/commands/skill.rs | 127 +++++++++ src/main.rs | 14 + src/mcp/mod.rs | 6 + src/mcp/tests.rs | 3 +- src/mcp/tools/mod.rs | 2 + src/mcp/tools/skill_sync.rs | 157 +++++++++++ src/registry/migrate.rs | 2 +- src/registry/migrations/mod.rs | 8 + src/registry/migrations/v35_skill_fts.rs | 35 +++ src/registry/migrations/v36_skill_sources.rs | 29 ++ src/registry/test_helpers.rs | 48 ++++ src/skill_runtime/mod.rs | 1 + src/skill_runtime/registry.rs | 77 +++++- src/skill_runtime/sources.rs | 277 +++++++++++++++++++ 18 files changed, 790 insertions(+), 7 deletions(-) create mode 100644 src/mcp/tools/skill_sync.rs create mode 100644 src/registry/migrations/v35_skill_fts.rs create mode 100644 src/registry/migrations/v36_skill_sources.rs create mode 100644 src/skill_runtime/sources.rs diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 7c1dac1..06586b4 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -15,8 +15,8 @@ | F-002 | Edition | `Cargo.toml` | **Rust 2024** | | F-003 | Test Coverage | CI | **494 passed, 0 failed, 5 ignored** | | F-004 | Production Unwrap | Architecture Invariants | **0** (G5 rule enforced) | -| F-005 | MCP Tools | `src/mcp/mod.rs` | **69** (5 Stable / 60 Beta / 4 Experimental) | -| F-006 | Schema Version | `registry/migrate.rs` | **v34** | +| F-005 | MCP Tools | `src/mcp/mod.rs` | **70** (5 Stable / 61 Beta / 4 Experimental) | +| F-006 | Schema Version | `registry/migrate.rs` | **v36** | | F-007 | Entities Table | Schema v21+ | **唯一真相源** (`repos` 表已删除) | | F-008 | SQLite Mode | `storage.rs` | **WAL mode** | | F-009 | Clippy | CI | **`-D warnings` 全绿** | diff --git a/Cargo.lock b/Cargo.lock index b2696e1..a808cfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1440,6 +1440,7 @@ version = "0.20.1" dependencies = [ "anyhow", "assert_cmd", + "async-trait", "blake3", "chrono", "clap", diff --git a/Cargo.toml b/Cargo.toml index 2b89a7e..2250f6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } walkdir = "2" anyhow = "1" +async-trait = "0.1" serde_json = "1" reqwest = { version = "0.12", features = ["json", "blocking"] } tantivy = "0.26" diff --git a/server.json b/server.json index b297927..05d8349 100644 --- a/server.json +++ b/server.json @@ -2,7 +2,7 @@ "$schema": "https://registry.modelcontextprotocol.io/schema/server.json", "name": "io.github.juice094.devbase", "version": "0.20.1", - "description": "Developer Knowledge OS — manage Git repos, vault notes (Markdown), and assets. AI-native workspace with 69 MCP tools.", + "description": "Developer Knowledge OS — manage Git repos, vault notes (Markdown), and assets. AI-native workspace with 70 MCP tools.", "license": "AGPL-3.0-or-later", "homepage": "https://github.com/juice094/devbase", "repository": { @@ -77,6 +77,7 @@ "devkit_skill_search", "devkit_skill_run", "devkit_skill_discover", + "devkit_skill_sync", "devkit_known_limit_store", "devkit_known_limit_list", "devkit_relation_store", @@ -107,7 +108,7 @@ } }, "tools": { - "count": 69, + "count": 70, "list": [ "devkit_scan", "devkit_health", diff --git a/src/commands/skill.rs b/src/commands/skill.rs index e6dac91..ae83f94 100644 --- a/src/commands/skill.rs +++ b/src/commands/skill.rs @@ -327,6 +327,103 @@ pub fn run_skill( println!(" [{}] {} (v{}) — rating: {:.2}", s.id, s.name, s.version, s.rating); } } + crate::SkillCommands::Import { + source, + source_path, + dry_run, + json, + } => { + use crate::skill_runtime::sources::{GitHubSource, LocalFileSource, SkillSource}; + let source_impl: Box = + if source.starts_with("https://github.com/") + || source.starts_with("http://github.com/") + || (source.contains('/') + && !source.starts_with('/') + && !source.contains("://")) + { + let (owner, repo) = parse_github_url(&source)?; + let path = source_path.as_deref().unwrap_or("skills"); + Box::new(GitHubSource::new(&owner, &repo, path)) + } else { + let path = source.strip_prefix("file://").unwrap_or(&source); + let name = source_path.as_deref().unwrap_or(path); + Box::new(LocalFileSource::new(name, std::path::Path::new(path))) + }; + let skills = tokio::runtime::Runtime::new() + .unwrap() + .block_on(source_impl.fetch())?; + if dry_run { + if json { + let names: Vec<&str> = skills.iter().map(|s| s.name.as_str()).collect(); + println!( + "{}", + serde_json::to_string_pretty(&serde_json::json!({ + "dry_run": true, + "source": source_impl.name(), + "skills_found": skills.len(), + "skill_names": names, + }))? + ); + } else { + println!( + "Dry-run: found {} skill(s) from '{}':", + skills.len(), + source_impl.name() + ); + for s in &skills { + println!(" - {} ({})", s.name, s.id); + } + } + } else { + let mut added = 0usize; + let mut updated = 0usize; + for skill in &skills { + let exists = registry::get_skill(&conn, &skill.id)?.is_some(); + registry::install_skill(&conn, skill)?; + if exists { + updated += 1; + } else { + added += 1; + } + } + // Record sync audit + let _ = conn.execute( + "INSERT INTO sync_log (source_name, status, skills_added, skills_updated, finished_at) + VALUES (?1, ?2, ?3, ?4, datetime('now'))", + rusqlite::params![ + source_impl.name(), + "success", + added as i64, + updated as i64 + ], + ); + let _ = conn.execute( + "INSERT INTO sync_sources (name, url, source_type) + VALUES (?1, ?2, ?3) + ON CONFLICT(name) DO UPDATE SET last_sync_at = datetime('now')", + rusqlite::params![source_impl.name(), &source, source_impl.name()], + ); + if json { + println!( + "{}", + serde_json::to_string_pretty(&serde_json::json!({ + "source": source_impl.name(), + "skills_found": skills.len(), + "skills_added": added, + "skills_updated": updated, + }))? + ); + } else { + println!( + "Imported {} skill(s) from '{}' ({} added, {} updated).", + skills.len(), + source_impl.name(), + added, + updated + ); + } + } + } crate::SkillCommands::Publish { path, dry_run } => { let p = std::path::PathBuf::from(&path); match skill_runtime::publish::validate_skill_for_publish(&p) { @@ -461,3 +558,33 @@ mod tests { assert!(result.is_ok()); } } + +fn parse_github_url(url: &str) -> anyhow::Result<(String, String)> { + let url = url.trim_end_matches(".git"); + if let Some(rest) = url.strip_prefix("https://github.com/") { + let parts: Vec<&str> = rest.split('/').collect(); + if parts.len() >= 2 { + return Ok((parts[0].to_string(), parts[1].to_string())); + } + } + if let Some(rest) = url.strip_prefix("http://github.com/") { + let parts: Vec<&str> = rest.split('/').collect(); + if parts.len() >= 2 { + return Ok((parts[0].to_string(), parts[1].to_string())); + } + } + if let Some((owner, repo)) = url.split_once('/') { + if !owner.is_empty() + && !repo.is_empty() + && !owner.contains("://") + && !owner.contains('\\') + && !owner.contains(' ') + { + return Ok((owner.to_string(), repo.to_string())); + } + } + Err(anyhow::anyhow!( + "Could not parse GitHub URL: {}. Expected format: owner/repo or https://github.com/owner/repo", + url + )) +} diff --git a/src/main.rs b/src/main.rs index e28e4e5..c4c06bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -453,6 +453,20 @@ pub(crate) enum SkillCommands { #[arg(long, default_value_t = 5)] limit: usize, }, + /// Import skills from external sources (GitHub repos or local directories) + Import { + /// Source URL or path (GitHub URL or local directory) + source: String, + /// Path within the source to scan for SKILL.md files + #[arg(long)] + source_path: Option, + /// Dry-run: list discovered skills without installing + #[arg(long)] + dry_run: bool, + /// Output as JSON + #[arg(long)] + json: bool, + }, } #[derive(Subcommand)] diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 45a83a0..f352d29 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -101,6 +101,7 @@ pub enum McpToolEnum { SkillSearch(DevkitSkillSearchTool), SkillRun(DevkitSkillRunTool), SkillDiscover(DevkitSkillDiscoverTool), + SkillSync(DevkitSkillSyncTool), KnownLimitStore(DevkitKnownLimitStoreTool), KnownLimitList(DevkitKnownLimitListTool), RelationStore(DevkitRelationStoreTool), @@ -198,6 +199,7 @@ impl McpToolEnum { McpToolEnum::SkillSearch(_) => ToolTier::Beta, McpToolEnum::SkillRun(_) => ToolTier::Beta, McpToolEnum::SkillDiscover(_) => ToolTier::Beta, + McpToolEnum::SkillSync(_) => ToolTier::Beta, McpToolEnum::KnownLimitStore(_) => ToolTier::Beta, McpToolEnum::KnownLimitList(_) => ToolTier::Beta, McpToolEnum::RelationStore(_) => ToolTier::Beta, @@ -274,6 +276,7 @@ impl McpTool for McpToolEnum { McpToolEnum::SkillSearch(t) => t.name(), McpToolEnum::SkillRun(t) => t.name(), McpToolEnum::SkillDiscover(t) => t.name(), + McpToolEnum::SkillSync(t) => t.name(), McpToolEnum::KnownLimitStore(t) => t.name(), McpToolEnum::KnownLimitList(t) => t.name(), McpToolEnum::RelationStore(t) => t.name(), @@ -348,6 +351,7 @@ impl McpTool for McpToolEnum { McpToolEnum::SkillSearch(t) => t.schema(), McpToolEnum::SkillRun(t) => t.schema(), McpToolEnum::SkillDiscover(t) => t.schema(), + McpToolEnum::SkillSync(t) => t.schema(), McpToolEnum::KnownLimitStore(t) => t.schema(), McpToolEnum::KnownLimitList(t) => t.schema(), McpToolEnum::RelationStore(t) => t.schema(), @@ -426,6 +430,7 @@ impl McpTool for McpToolEnum { McpToolEnum::SkillSearch(t) => t.invoke(args, ctx).await, McpToolEnum::SkillRun(t) => t.invoke(args, ctx).await, McpToolEnum::SkillDiscover(t) => t.invoke(args, ctx).await, + McpToolEnum::SkillSync(t) => t.invoke(args, ctx).await, McpToolEnum::KnownLimitStore(t) => t.invoke(args, ctx).await, McpToolEnum::KnownLimitList(t) => t.invoke(args, ctx).await, McpToolEnum::RelationStore(t) => t.invoke(args, ctx).await, @@ -770,6 +775,7 @@ pub fn build_server_with_tiers(tiers: Option<&HashSet>) -> McpServer { McpToolEnum::SkillSearch(DevkitSkillSearchTool), McpToolEnum::SkillRun(DevkitSkillRunTool), McpToolEnum::SkillDiscover(DevkitSkillDiscoverTool), + McpToolEnum::SkillSync(DevkitSkillSyncTool), McpToolEnum::KnownLimitStore(DevkitKnownLimitStoreTool), McpToolEnum::KnownLimitList(DevkitKnownLimitListTool), McpToolEnum::RelationStore(DevkitRelationStoreTool), diff --git a/src/mcp/tests.rs b/src/mcp/tests.rs index 653463b..f413240 100644 --- a/src/mcp/tests.rs +++ b/src/mcp/tests.rs @@ -50,7 +50,7 @@ async fn test_tools_list() { let (mut ctx, _tmp) = test_ctx(); let resp = server.handle_request(req, &mut ctx).await.unwrap(); let tools = resp.get("result").unwrap().get("tools").unwrap().as_array().unwrap(); - assert_eq!(tools.len(), 69); + assert_eq!(tools.len(), 70); let names: Vec<&str> = tools.iter().map(|t| t.get("name").unwrap().as_str().unwrap()).collect(); assert!(names.contains(&"devkit_index_health")); assert!(names.contains(&"devkit_vault_export")); @@ -105,6 +105,7 @@ async fn test_tools_list() { assert!(names.contains(&"devkit_skill_search")); assert!(names.contains(&"devkit_skill_run")); assert!(names.contains(&"devkit_skill_discover")); + assert!(names.contains(&"devkit_skill_sync")); assert!(names.contains(&"devkit_known_limit_store")); assert!(names.contains(&"devkit_known_limit_list")); assert!(names.contains(&"devkit_relation_store")); diff --git a/src/mcp/tools/mod.rs b/src/mcp/tools/mod.rs index cea8769..492f24b 100644 --- a/src/mcp/tools/mod.rs +++ b/src/mcp/tools/mod.rs @@ -13,6 +13,7 @@ pub mod relations; pub mod repo; pub mod session; pub mod skill; +pub mod skill_sync; pub mod status; pub mod vault; pub mod workflow; @@ -34,6 +35,7 @@ pub use relations::*; pub use repo::*; pub use session::*; pub use skill::*; +pub use skill_sync::*; pub use status::*; pub use vault::*; pub use workflow::*; diff --git a/src/mcp/tools/skill_sync.rs b/src/mcp/tools/skill_sync.rs new file mode 100644 index 0000000..3f3d5c8 --- /dev/null +++ b/src/mcp/tools/skill_sync.rs @@ -0,0 +1,157 @@ +use anyhow::Context; + +use crate::mcp::McpTool; +use crate::skill_runtime::sources::{GitHubSource, LocalFileSource, SkillSource}; +use crate::skill_runtime::registry; + +#[derive(Clone)] +pub struct DevkitSkillSyncTool; + +impl McpTool for DevkitSkillSyncTool { + fn name(&self) -> &'static str { + "devkit_skill_sync" + } + + fn schema(&self) -> serde_json::Value { + serde_json::json!({ + "description": r#"Sync skills from external sources (GitHub repositories or local directories) into the devbase skill registry. +Each discovered SKILL.md file is parsed and installed. Sources are recorded and can be re-synced incrementally. + +Source URL formats: +- GitHub: https://github.com/owner/repo/tree/branch/path or owner/repo +- Local: file:///absolute/path or /absolute/path + +Requires DEVBASE_MCP_ENABLE_DESTRUCTIVE=1 since this modifies the skill registry."#, + "inputSchema": { + "type": "object", + "properties": { + "source": { + "type": "string", + "description": "Source URL or path to sync skills from. GitHub repo URL or local directory path." + }, + "source_path": { + "type": "string", + "description": "Path within the source to scan (default: '.' for local, 'skills' for GitHub)." + }, + "dry_run": { + "type": "boolean", + "description": "If true, list discovered skills without installing them.", + "default": false + } + }, + "required": ["source"] + } + }) + } + + async fn invoke( + &self, + args: serde_json::Value, + ctx: &mut crate::storage::AppContext, + ) -> anyhow::Result { + crate::mcp::check_destructive_enabled()?; + + let source_url = args + .get("source") + .and_then(|v| v.as_str()) + .context("Missing required argument: source")?; + let source_path = args.get("source_path").and_then(|v| v.as_str()); + let dry_run = args + .get("dry_run") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + let source: Box = if source_url.starts_with("https://github.com/") + || source_url.starts_with("http://github.com/") + || (source_url.contains('/') && !source_url.starts_with("/") && !source_url.contains("://")) + { + let (owner, repo) = parse_github_url(source_url)?; + let path = source_path.unwrap_or("skills"); + Box::new(GitHubSource::new(&owner, &repo, path)) + } else { + let path = source_url.strip_prefix("file://").unwrap_or(source_url); + let name = source_path.unwrap_or(path); + Box::new(LocalFileSource::new(name, std::path::Path::new(path))) + }; + + let skills = source.fetch().await?; + let count = skills.len(); + + if dry_run { + let names: Vec = skills.iter().map(|s| s.name.clone()).collect(); + return Ok(serde_json::json!({ + "dry_run": true, + "source": source.name(), + "skills_found": count, + "skill_names": names + })); + } + + let conn = ctx.conn_mut()?; + let mut added = 0usize; + let mut updated = 0usize; + + for skill in &skills { + let exists = registry::get_skill(&conn, &skill.id)?.is_some(); + registry::install_skill(&conn, skill)?; + if exists { + updated += 1; + } else { + added += 1; + } + } + + // Record sync in audit log + let _ = conn.execute( + "INSERT INTO sync_log (source_name, status, skills_added, skills_updated, finished_at) + VALUES (?1, ?2, ?3, ?4, datetime('now'))", + rusqlite::params![source.name(), "success", added as i64, updated as i64], + ); + + // Update last_sync_at on the source record + let _ = conn.execute( + "INSERT INTO sync_sources (name, url, source_type) + VALUES (?1, ?2, ?3) + ON CONFLICT(name) DO UPDATE SET last_sync_at = datetime('now')", + rusqlite::params![source.name(), source_url, source.name()], + ); + + Ok(serde_json::json!({ + "source": source.name(), + "skills_found": count, + "skills_added": added, + "skills_updated": updated, + "dry_run": false + })) + } +} + +fn parse_github_url(url: &str) -> anyhow::Result<(String, String)> { + let url = url.trim_end_matches(".git"); + if let Some(rest) = url.strip_prefix("https://github.com/") { + let parts: Vec<&str> = rest.split('/').collect(); + if parts.len() >= 2 { + return Ok((parts[0].to_string(), parts[1].to_string())); + } + } + if let Some(rest) = url.strip_prefix("http://github.com/") { + let parts: Vec<&str> = rest.split('/').collect(); + if parts.len() >= 2 { + return Ok((parts[0].to_string(), parts[1].to_string())); + } + } + // Bare owner/repo format + if let Some((owner, repo)) = url.split_once('/') { + if !owner.is_empty() && !repo.is_empty() + && !owner.contains("://") + && !owner.contains('\\') + && !owner.contains(' ') + { + return Ok((owner.to_string(), repo.to_string())); + } + } + Err(anyhow::anyhow!( + "Could not parse GitHub URL: {}. Expected format: owner/repo or https://github.com/owner/repo", + url + )) +} diff --git a/src/registry/migrate.rs b/src/registry/migrate.rs index 913ecf6..4e926aa 100644 --- a/src/registry/migrate.rs +++ b/src/registry/migrate.rs @@ -4,7 +4,7 @@ use super::*; use crate::storage::StorageBackend; use std::path::PathBuf; -pub const CURRENT_SCHEMA_VERSION: i32 = 34; +pub const CURRENT_SCHEMA_VERSION: i32 = 36; impl WorkspaceRegistry { pub fn db_path() -> anyhow::Result { diff --git a/src/registry/migrations/mod.rs b/src/registry/migrations/mod.rs index 01de61d..ca97093 100644 --- a/src/registry/migrations/mod.rs +++ b/src/registry/migrations/mod.rs @@ -36,6 +36,8 @@ pub mod v31_agent_contexts; pub mod v32_context_links; pub mod v33_workflow_context; pub mod v34_memory_vectors; +pub mod v35_skill_fts; +pub mod v36_skill_sources; pub fn run_all(conn: &mut Connection) -> anyhow::Result<()> { let user_version: i32 = conn.query_row("PRAGMA user_version", [], |row| row.get(0))?; @@ -142,6 +144,12 @@ pub fn run_all(conn: &mut Connection) -> anyhow::Result<()> { if user_version < 34 { v34_memory_vectors::run(conn)?; } + if user_version < 35 { + v35_skill_fts::run(conn)?; + } + if user_version < 36 { + v36_skill_sources::run(conn)?; + } Ok(()) } diff --git a/src/registry/migrations/v35_skill_fts.rs b/src/registry/migrations/v35_skill_fts.rs new file mode 100644 index 0000000..5be6369 --- /dev/null +++ b/src/registry/migrations/v35_skill_fts.rs @@ -0,0 +1,35 @@ +use rusqlite::Connection; + +pub fn run(conn: &Connection) -> anyhow::Result<()> { + conn.execute_batch( + "CREATE VIRTUAL TABLE IF NOT EXISTS skills_fts USING fts5( + name, + description, + tags, + category, + content='skills', + content_rowid='rowid', + tokenize='unicode61' + ); + + CREATE TRIGGER IF NOT EXISTS skills_fts_ai AFTER INSERT ON skills BEGIN + INSERT INTO skills_fts(rowid, name, description, tags, category) + VALUES (new.rowid, new.name, new.description, new.tags, new.category); + END; + + CREATE TRIGGER IF NOT EXISTS skills_fts_ad AFTER DELETE ON skills BEGIN + INSERT INTO skills_fts(skills_fts, rowid, name, description, tags, category) + VALUES ('delete', old.rowid, old.name, old.description, old.tags, old.category); + END; + + CREATE TRIGGER IF NOT EXISTS skills_fts_au AFTER UPDATE ON skills BEGIN + INSERT INTO skills_fts(skills_fts, rowid, name, description, tags, category) + VALUES ('delete', old.rowid, old.name, old.description, old.tags, old.category); + INSERT INTO skills_fts(rowid, name, description, tags, category) + VALUES (new.rowid, new.name, new.description, new.tags, new.category); + END;", + )?; + + conn.execute("PRAGMA user_version = 35", [])?; + Ok(()) +} diff --git a/src/registry/migrations/v36_skill_sources.rs b/src/registry/migrations/v36_skill_sources.rs new file mode 100644 index 0000000..71f5e38 --- /dev/null +++ b/src/registry/migrations/v36_skill_sources.rs @@ -0,0 +1,29 @@ +use rusqlite::Connection; + +pub fn run(conn: &Connection) -> anyhow::Result<()> { + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS sync_sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + url TEXT NOT NULL, + source_type TEXT NOT NULL DEFAULT 'github', + enabled INTEGER NOT NULL DEFAULT 1, + last_sync_at TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + + CREATE TABLE IF NOT EXISTS sync_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_name TEXT NOT NULL, + status TEXT NOT NULL, + skills_added INTEGER NOT NULL DEFAULT 0, + skills_updated INTEGER NOT NULL DEFAULT 0, + error_message TEXT, + started_at TEXT NOT NULL DEFAULT (datetime('now')), + finished_at TEXT + );", + )?; + + conn.execute("PRAGMA user_version = 36", [])?; + Ok(()) +} diff --git a/src/registry/test_helpers.rs b/src/registry/test_helpers.rs index 13385d7..f990516 100644 --- a/src/registry/test_helpers.rs +++ b/src/registry/test_helpers.rs @@ -195,6 +195,33 @@ CREATE TABLE IF NOT EXISTS skills ( ); CREATE INDEX IF NOT EXISTS idx_skills_type ON skills(skill_type); +CREATE VIRTUAL TABLE IF NOT EXISTS skills_fts USING fts5( + name, + description, + tags, + category, + content='skills', + content_rowid='rowid', + tokenize='unicode61' +); + +CREATE TRIGGER IF NOT EXISTS skills_fts_ai AFTER INSERT ON skills BEGIN + INSERT INTO skills_fts(rowid, name, description, tags, category) + VALUES (new.rowid, new.name, new.description, new.tags, new.category); +END; + +CREATE TRIGGER IF NOT EXISTS skills_fts_ad AFTER DELETE ON skills BEGIN + INSERT INTO skills_fts(skills_fts, rowid, name, description, tags, category) + VALUES ('delete', old.rowid, old.name, old.description, old.tags, old.category); +END; + +CREATE TRIGGER IF NOT EXISTS skills_fts_au AFTER UPDATE ON skills BEGIN + INSERT INTO skills_fts(skills_fts, rowid, name, description, tags, category) + VALUES ('delete', old.rowid, old.name, old.description, old.tags, old.category); + INSERT INTO skills_fts(rowid, name, description, tags, category) + VALUES (new.rowid, new.name, new.description, new.tags, new.category); +END; + CREATE TABLE IF NOT EXISTS skill_executions ( id INTEGER PRIMARY KEY AUTOINCREMENT, skill_id TEXT NOT NULL REFERENCES skills(id) ON DELETE CASCADE, @@ -208,6 +235,27 @@ CREATE TABLE IF NOT EXISTS skill_executions ( duration_ms INTEGER ); +CREATE TABLE IF NOT EXISTS sync_sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + url TEXT NOT NULL, + source_type TEXT NOT NULL DEFAULT 'github', + enabled INTEGER NOT NULL DEFAULT 1, + last_sync_at TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE TABLE IF NOT EXISTS sync_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_name TEXT NOT NULL, + status TEXT NOT NULL, + skills_added INTEGER NOT NULL DEFAULT 0, + skills_updated INTEGER NOT NULL DEFAULT 0, + error_message TEXT, + started_at TEXT NOT NULL DEFAULT (datetime('now')), + finished_at TEXT +); + -- v16: Unified Entity Model CREATE TABLE IF NOT EXISTS entity_types ( name TEXT PRIMARY KEY, diff --git a/src/skill_runtime/mod.rs b/src/skill_runtime/mod.rs index 31f3445..442808c 100644 --- a/src/skill_runtime/mod.rs +++ b/src/skill_runtime/mod.rs @@ -8,6 +8,7 @@ pub mod parser; pub mod publish; pub mod registry; pub mod scoring; +pub mod sources; // Types migrated to devbase-skill-runtime-types crate. pub use devbase_skill_runtime_types::*; diff --git a/src/skill_runtime/registry.rs b/src/skill_runtime/registry.rs index cf76517..4216bcf 100644 --- a/src/skill_runtime/registry.rs +++ b/src/skill_runtime/registry.rs @@ -208,12 +208,67 @@ pub fn list_skills( rows.collect::, _>>().map_err(Into::into) } -/// Full-text search on skill name and description. +/// Full-text search on skill name, description, tags, and category. +/// Uses FTS5 with BM25 ranking when the FTS index is available, +/// falling back to SQLite LIKE for cases where FTS5 returns no results. pub fn search_skills_text( conn: &Connection, query: &str, limit: usize, category: Option<&str>, +) -> anyhow::Result> { + // Try FTS5 first with BM25 ranking + if let Ok(results) = search_skills_fts5(conn, query, limit, category) { + if !results.is_empty() { + return Ok(results); + } + } + // Fallback to LIKE for empty results or FTS5 errors + search_skills_like(conn, query, limit, category) +} + +fn search_skills_fts5( + conn: &Connection, + query: &str, + limit: usize, + category: Option<&str>, +) -> anyhow::Result> { + let fts_query = build_fts5_query(query); + + let sql = if category.is_some() { + "SELECT s.id, s.name, s.version, s.description, s.author, s.tags, s.entry_script, + s.skill_type, s.local_path, s.installed_at, s.updated_at, s.last_used_at, + s.dependencies, s.category + FROM skills_fts f + JOIN skills s ON s.rowid = f.rowid + WHERE skills_fts MATCH ?1 AND s.category = ?2 + ORDER BY bm25(skills_fts, 1.0, 0.8, 0.4, 0.2) ASC + LIMIT ?3" + } else { + "SELECT s.id, s.name, s.version, s.description, s.author, s.tags, s.entry_script, + s.skill_type, s.local_path, s.installed_at, s.updated_at, s.last_used_at, + s.dependencies, s.category + FROM skills_fts f + JOIN skills s ON s.rowid = f.rowid + WHERE skills_fts MATCH ?1 + ORDER BY bm25(skills_fts, 1.0, 0.8, 0.4, 0.2) ASC + LIMIT ?2" + }; + + let mut stmt = conn.prepare(sql)?; + let rows = if let Some(cat) = category { + stmt.query_map(params![&fts_query, cat, limit as i64], skill_row_from_sql)? + } else { + stmt.query_map(params![&fts_query, limit as i64], skill_row_from_sql)? + }; + rows.collect::, _>>().map_err(Into::into) +} + +fn search_skills_like( + conn: &Connection, + query: &str, + limit: usize, + category: Option<&str>, ) -> anyhow::Result> { let pattern = format!("%{}%", query.replace('%', "\\%").replace('_', "\\_")); let sql = if category.is_some() { @@ -240,6 +295,26 @@ pub fn search_skills_text( rows.collect::, _>>().map_err(Into::into) } +/// Build a safe FTS5 query string from user input. +/// Multi-word queries are AND'ed; empty input matches all. +fn build_fts5_query(text: &str) -> String { + let trimmed = text.trim(); + if trimmed.is_empty() { + return "*".to_string(); + } + let terms: Vec<&str> = trimmed.split_whitespace().collect(); + if terms.len() == 1 { + // Escape double-quotes within a term to avoid FTS5 syntax errors + format!("\"{}\"", terms[0].replace('"', "\"\"")) + } else { + terms + .iter() + .map(|t| format!("\"{}\"", t.replace('"', "\"\""))) + .collect::>() + .join(" AND ") + } +} + /// Semantic search on skill descriptions using cosine similarity. pub fn search_skills_semantic( conn: &Connection, diff --git a/src/skill_runtime/sources.rs b/src/skill_runtime/sources.rs new file mode 100644 index 0000000..72d1390 --- /dev/null +++ b/src/skill_runtime/sources.rs @@ -0,0 +1,277 @@ +use std::path::Path; + +use async_trait::async_trait; +use chrono::Utc; + +use super::{SkillMeta, SkillType}; + +/// Trait for pluggable external skill sources. +/// Each implementation knows how to fetch skills from a specific origin +/// (GitHub repo, HTTP endpoint, local directory, etc.). +#[async_trait] +pub trait SkillSource: Send + Sync { + /// Human-readable name for this source (e.g., "github:anthropics/skills"). + fn name(&self) -> &str; + + /// Fetch skills from this source. Called by the sync pipeline. + async fn fetch(&self) -> anyhow::Result>; +} + +// ── GitHub Source ────────────────────────────────────────────────── + +pub struct GitHubSource { + pub owner: String, + pub repo: String, + /// Path within the repo to scan for SKILL.md files (e.g., "skills"). + pub path: String, + client: reqwest::Client, +} + +impl GitHubSource { + pub fn new(owner: &str, repo: &str, path: &str) -> Self { + GitHubSource { + owner: owner.to_string(), + repo: repo.to_string(), + path: path.to_string(), + client: reqwest::Client::new(), + } + } +} + +#[async_trait] +impl SkillSource for GitHubSource { + fn name(&self) -> &str { + "github" + } + + async fn fetch(&self) -> anyhow::Result> { + let url = format!( + "https://api.github.com/repos/{}/{}/contents/{}", + self.owner, self.repo, self.path + ); + + let config = crate::config::Config::load()?; + let mut req = self + .client + .get(&url) + .header("User-Agent", "devbase-skill-sync") + .header("Accept", "application/vnd.github.v3+json"); + + if let Some(token) = config.github.token.as_deref() { + req = req.header("Authorization", format!("Bearer {}", token)); + } + + let resp = req.send().await?; + if !resp.status().is_success() { + return Err(anyhow::anyhow!( + "GitHub API returned {} for {}/{}", + resp.status(), + self.owner, + self.repo + )); + } + + let entries: Vec = resp.json().await?; + let mut skills = Vec::new(); + + for entry in entries { + let entry_type = entry["type"].as_str().unwrap_or(""); + let name = entry["name"].as_str().unwrap_or("unknown"); + + // Handle directories by recursing (Box::pin required for recursive async fn) + if entry_type == "dir" { + let dir_source = GitHubSource::new( + &self.owner, + &self.repo, + &format!("{}/{}", self.path, name), + ); + let fut = Box::pin(dir_source.fetch()); + if let Ok(dir_skills) = fut.await { + skills.extend(dir_skills); + } + continue; + } + + if !name.ends_with(".md") { + continue; + } + + let download_url = entry["download_url"].as_str().unwrap_or(""); + let html_url = entry["html_url"].as_str().unwrap_or(""); + if download_url.is_empty() { + continue; + } + + let content = match self + .client + .get(download_url) + .header("User-Agent", "devbase-skill-sync") + .send() + .await + { + Ok(resp) => resp.text().await.unwrap_or_default(), + Err(_) => continue, + }; + + let skill_id = name.trim_end_matches(".md").to_lowercase().replace('_', "-"); + + // Try parsing as SKILL.md first; fall back to plain markdown extraction + let skill_meta = if content.contains("---") { + parse_skill_or_extract(&content, &skill_id, html_url) + } else { + extract_skill_from_md(&content, &skill_id, html_url) + }; + + skills.push(skill_meta); + } + + Ok(skills) + } +} + +// ── Local File Source ────────────────────────────────────────────── + +pub struct LocalFileSource { + pub name_str: String, + pub dir_path: std::path::PathBuf, +} + +impl LocalFileSource { + pub fn new(name: &str, dir_path: &Path) -> Self { + LocalFileSource { + name_str: name.to_string(), + dir_path: dir_path.to_path_buf(), + } + } +} + +#[async_trait] +impl SkillSource for LocalFileSource { + fn name(&self) -> &str { + &self.name_str + } + + async fn fetch(&self) -> anyhow::Result> { + let mut skills = Vec::new(); + scan_dir_for_skills(&self.dir_path, &mut skills)?; + Ok(skills) + } +} + +fn scan_dir_for_skills(dir: &Path, skills: &mut Vec) -> anyhow::Result<()> { + if !dir.is_dir() { + return Ok(()); + } + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_dir() { + scan_dir_for_skills(&path, skills)?; + } else if path.extension().map_or(false, |e| e == "md") { + let content = std::fs::read_to_string(&path)?; + let name = path + .file_stem() + .unwrap_or_default() + .to_string_lossy() + .to_string(); + let skill_id = name.to_lowercase().replace('_', "-"); + let skill_meta = if content.contains("---") { + parse_skill_or_extract(&content, &skill_id, "") + } else { + extract_skill_from_md(&content, &skill_id, "") + }; + skills.push(skill_meta); + } + } + Ok(()) +} + +// ── Helpers ──────────────────────────────────────────────────────── + +fn parse_skill_or_extract(content: &str, skill_id: &str, source_url: &str) -> SkillMeta { + // Try the proper SKILL.md parser first + let skill_dir = std::env::temp_dir().join(format!("devbase-skill-import-{}", skill_id)); + std::fs::create_dir_all(&skill_dir).ok(); + let skill_md = skill_dir.join("SKILL.md"); + std::fs::write(&skill_md, content).ok(); + + let mut skill = crate::skill_runtime::parser::parse_skill_md(&skill_md) + .unwrap_or_else(|_| extract_skill_from_md(content, skill_id, source_url)); + + // Override with our computed id + let old_id = skill.id.clone(); + skill.id = skill_id.to_string(); + skill.local_path = skill_dir; + skill.skill_type = SkillType::Custom; + + // If the parser didn't pick up the description, try extracting + if skill.description.is_empty() || skill.description == old_id { + skill.description = extract_description(content); + } + + skill +} + +fn extract_skill_from_md(content: &str, skill_id: &str, _source_url: &str) -> SkillMeta { + let description = extract_description(content); + let tags = extract_tags(content); + + SkillMeta { + id: skill_id.to_string(), + name: skill_id.to_string(), + version: "0.1.0".to_string(), + description, + author: None, + tags, + entry_script: None, + category: None, + skill_type: SkillType::Custom, + local_path: std::path::PathBuf::new(), + inputs: vec![], + outputs: vec![], + dependencies: vec![], + embedding: None, + installed_at: Utc::now(), + updated_at: Utc::now(), + last_used_at: None, + body: content.to_string(), + } +} + +fn extract_description(content: &str) -> String { + for line in content.lines() { + let trimmed = line.trim(); + if let Some(desc) = trimmed.strip_prefix("description:") { + return desc.trim().trim_matches('"').to_string(); + } + } + content + .lines() + .find(|l| { + let t = l.trim(); + !t.is_empty() && !t.starts_with('#') && !t.starts_with("---") + }) + .unwrap_or("") + .trim() + .to_string() +} + +fn extract_tags(content: &str) -> Vec { + for line in content.lines() { + let trimmed = line.trim(); + if let Some(tags_str) = trimmed.strip_prefix("tags:") { + return tags_str + .split(',') + .map(|t| { + t.trim() + .trim_matches('"') + .trim_matches('[') + .trim_matches(']') + .to_string() + }) + .filter(|t| !t.is_empty()) + .collect(); + } + } + vec![] +} From 28d24e05a67733ff14a24b4c380a1c3b2816230c Mon Sep 17 00:00:00 2001 From: juice094 Date: Thu, 4 Jun 2026 19:05:28 +0800 Subject: [PATCH 02/11] Update docs for v35-v36: FTS5 skill search + pluggable skill sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README: 69→70 MCP tools, add FTS5 hybrid search highlight - CHANGELOG: document FTS5 skills_fts (v35), SkillSource trait + sync audit (v36), devkit_skill_sync MCP tool, skill import CLI - AGENTS: Schema 34→36, 69→70 tools, updated test counts Co-Authored-By: Claude Opus 4.7 --- AGENTS.md | 6 +++--- CHANGELOG.md | 3 +++ README.md | 8 ++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8e8f4e7..71577d7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,7 +5,7 @@ > 它将本地数字资产的原始数据(代码库、笔记、Skill、工作流)编译为 AI 可决策的结构化情境,不负责思考,不负责执行,只负责感知、编码、持久化、检索。 - **当前阶段**:阶段十一 — v0.20.0 已发布(知识完备性) -- **当前版本**:v0.20.1(Schema 34,69 MCP tools,494 tests) +- **当前版本**:v0.20.1(Schema 36,70 MCP tools,494 tests) - **已完成里程碑**:Registry God Object 完全拆解(10 子模块提取)+ 18 workspace crates 提取 + MCP Python SDK 1.16.0 兼容修复 + repo.rs trait 化 + flaky 测试根治(RF-2.1/2.2/2.3)+ 许可证迁移 + health 性能优化(-44%)+ index skip-embeddings + batch encoding 实验 + RF-6 清零 + 架构治理文档(ADR/不变量清单)+ Tantivy BM25 代码符号搜索(P1)+ AppContext 职责拆分 Phase 1/2(storage.rs 860→430 行)+ 架构不变量 CI(G5/T11/T12)+ Embedding 多后端(Candle/Ollama 配置切换, P3)+ EnvVersionCache 扩展(9 工具链检测, P4)+ **v0.16.0 Agent Contexts(P1/P2/P3)**:`agent_contexts`/`agent_memories`/`context_entity_links` Schema + 9 个 Session MCP tools + Context-aware Skill Runtime(`DEVBASE_ACTIVE_CONTEXT` 注入)+ **v0.16.1 Workflow-Session Binding**:`workflow_executions.context_id` + 执行自动绑定 Active Context + **v0.17.0 Embedding Externalization**:`embedding` 从 default features 移除(Candle/Ollama 降级为 opt-in `llm-backend`)+ Schema 34 向量存储 + `cosine_similarity` SQLite UDF + `devkit_session_recall` / `devkit_session_index`(60 tools)+ **v0.18.0 ClaudeCode Integration**:`devkit_project_brief`(Markdown 项目简报)+ `devkit_impact_analysis`(修改影响范围分析)+ `devkit_session_export` / `devkit_session_import` + `scripts/devbase-claude.ps1` 启动器(自动注入 `.claude/CLAUDE.md`)+ RFC `docs/RFC/claudecode-workflow-integration.md`(64 tools)+ **v0.18.0 发布收尾**:PR 合并 + 双平台二进制构建 + GitHub Release + 根目录治理 + 世界模型战略认知沉淀(Vault + AGENTS 双向联动)+ NotebookLM 生态消化(5 项目注册)+ GreptimeDB 互补分析 + **v0.19.0 知识基础设施硬化**:SQLite WAL 默认启用 + `devkit_index_health`(Beta)+ Vault 导出(`devkit_vault_export`)+ Redis ADR 决策(放弃引入)+ **v0.20.0 知识完备性**:Vault 双向链接 BFS 图遍历(`devkit_vault_graph` 扩展)+ Vault Git-based 历史追踪(`devkit_vault_history`,第 67 个 tool)+ 混合检索质量监控(`devkit_search_quality`,第 68 个 tool,`HybridSearchMetrics`)+ Block 引用支持(`WikiLink.anchor`:`[[note#heading]]` / `[[note#^block-id]]`)+ 性能回归基线(`#[ignore]` 1k/10k 阈值测试)+ 客户端无关原则(Client-Agnostic Principle)落地 + `skill sync` 泛化接口(零硬编码客户端路径) - **核心方向**:让 Kimi CLI 在调用文件工具之前,先通过 devbase 获得"该读哪些文件、为什么读、它们之间的关系" - **本质分析**:见 `vault/99-Meta/devbase-essence-analysis-20260430.md` 与 `docs/architecture/redefinition.md` @@ -23,10 +23,10 @@ Skill Runtime 全生命周期已落地(含依赖管理 Schema v15),Schema - **Workspace**:`%LOCALAPPDATA%\devbase\workspace/` —— 文件系统 = source of truth - `vault/` —— PARA 结构:00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - `assets/` —— 二进制资源 -- **MCP Server**:stdio only,**69 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 4 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具 + 1 个 DocumentConvert 工具);配置见 `mcp.json` +- **MCP Server**:stdio only,**70 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 5 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具 + 1 个 DocumentConvert 工具);配置见 `mcp.json` - **Kimi CLI 集成**:MCP server 已通过 `kimi mcp add` 注册,端到端验证通过(`kimi --print` 成功调用 `devkit_health`);项目级 skill 位于 `.kimi/skills/devbase-project/SKILL.md` - **统一节点模型**:`core::node::{Node, NodeType, Edge}` —— GitRepo / VaultNote / Asset / ExternalLink -- **当前测试**:451+ lib passed / 0 failed / 5 ignored + 11/11 integration passed(`tests/cli.rs`) +- **当前测试**:476 lib passed / 0 failed / 5 ignored + 7/7 integration passed + 11/11 workflow passed(共 494) - **编译状态**:0 warning / 0 vulnerabilities(`cargo audit` 干净,除上游 `tokei` 的 `RUSTSEC-2020-0163`) - **Workspace 结构**:`crates/` 目录已启用,19 个零耦合模块已提取为独立 crate(`devbase-symbol-links`, `devbase-sync-protocol`, `devbase-core-types`, `devbase-syncthing-client`, `devbase-vault-frontmatter`, `devbase-vault-wikilink`, `devbase-workflow-interpolate`, `devbase-workflow-model`, `devbase-registry-health`, `devbase-registry-metrics`, `devbase-registry-workspace`, `devbase-embedding`, `devbase-skill-runtime-types`, `devbase-skill-runtime-parser`, `devbase-registry-entity`, `devbase-registry-relation`, `devbase-registry-call-graph`, `devbase-registry-dead-code`, `devbase-registry-code-symbols`) - **Workflow Engine**:YAML 解析 + 拓扑调度 + batch 并行执行 + 5 种 step 类型(skill/subworkflow/parallel/condition/loop) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bf9074..c5a395e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- **FTS5 技能全文搜索** (Schema v35) — `skills_fts` 虚拟表 + 触发器,`search_skills_text()` 使用 BM25 排序(name=1.0, desc=0.8, tags=0.4, category=0.2),LIKE 降级 fallback +- **可插拔外部技能源** (Schema v36) — `SkillSource` trait + `GitHubSource` / `LocalFileSource`,`sync_sources` / `sync_log` 审计表,`devkit_skill_sync` MCP 工具(Beta tier),`devbase skill import` CLI 子命令 +- MCP 工具数: 69 → **70**(5 stable + 61 beta + 4 experimental) - `devkit_document_convert` — Experimental tier MCP tool,PDF/PPTX → Markdown 转换(`pdftotext` / `python-pptx` 流水线),含 frontmatter 质量标注 - Stable 工具 invocation 测试补全:`devkit_query_repos`、`devkit_vault_search`、`devkit_vault_read`、`devkit_status`、`devkit_workflow_list`、`devkit_index` - `seed_repo()` 轻量测试 helper(仅插入 `entities` 表,无副作用) diff --git a/README.md b/README.md index 607ea58..d9d0d06 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ devbase 将代码库、笔记与工作流编译为 AI 可推理的结构化情 | 你是谁 | devbase 为你做什么 | |:---|:---| | **人类开发者** | `devbase tui` — 终端仪表盘,一眼看清 N 个仓库的 Git 状态,按 `s` 批量安全同步 | -| **AI 智能体** | 69 个 MCP 工具:通过 `devkit_skill_run` 发现、执行、编排 Skill — 不再重复造轮子 | +| **AI 智能体** | 70 个 MCP 工具:通过 `devkit_skill_run` 发现、执行、编排 Skill — 不再重复造轮子 | | **项目维护者** | `devbase skill discover .` — 一键将项目封装为 Skill,让 AI 用户能够发现和调用 | --- @@ -34,11 +34,11 @@ devbase 将代码库、笔记与工作流编译为 AI 可推理的结构化情 | 亮点 | 说明 | |:---|:---| | 📊 **TUI 仪表盘** | ratatui 终端界面:跨仓库搜索、安全同步、Skill/Workflow 发现 | -| 🔌 **69 个 MCP 工具** | stdio 本地进程通信:仓库管理、代码分析、知识图谱、智能体记忆 | +| 🔌 **70 个 MCP 工具** | stdio 本地进程通信:仓库管理、代码分析、知识图谱、智能体记忆 | | 🏠 **本地优先** | 零数据离开本机 — SQLite + Tantivy + tree-sitter,无需云端 | -| 🔍 **混合检索** | BM25 全文 + 纯 SQL 向量搜索(`cosine_similarity` UDF),零 ML 运行时依赖 | +| 🔍 **混合检索** | BM25 全文 + FTS5 技能搜索 + 纯 SQL 向量搜索(`cosine_similarity` UDF),零 ML 运行时依赖 | -> [完整 69 个 Tool 矩阵 → docs/guides/mcp-integration.md](docs/guides/mcp-integration.md) +> [完整 70 个 Tool 矩阵 → docs/guides/mcp-integration.md](docs/guides/mcp-integration.md) --- From 836a47341fefe778929097eebbceeb4201a21f74 Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 6 Jun 2026 20:28:56 +0800 Subject: [PATCH 03/11] Sanitize archived docs: replace personal identifiers with neutral terms Replace persona-specific identifiers in historical meeting notes with neutral [External-Test-Node] placeholders. Co-Authored-By: Claude Opus 4.7 --- docs/_archive/0423-cross-project-meeting.md | 94 ++++++++++----------- docs/_archive/DEVELOPMENT_ROADMAP_0423.md | 20 ++--- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/docs/_archive/0423-cross-project-meeting.md b/docs/_archive/0423-cross-project-meeting.md index aa84c66..be3d552 100644 --- a/docs/_archive/0423-cross-project-meeting.md +++ b/docs/_archive/0423-cross-project-meeting.md @@ -142,11 +142,11 @@ **刚完成**: - Security P0 修复:BEP DoS 保护、API key 掩码日志、CSPRNG - Clippy 全面清零:45 warnings → 0 -- 格雷 Step A 完成:云端 `rust-sync-test` 已共享给 `XQVFE6J` +- [External-Test-Node] Step A 完成:云端 `rust-sync-test` 已共享给 `XQVFE6J` **正在进行**: -- 格雷 72h 压测反馈处理( Gray-Cloud @ 100.99.240.98:22000 ) - - ✅ Rust 端已连接格雷云端 Go Syncthing(`remote.version=0.1.0`) +- [External-Test-Node] 72h 压测反馈处理( Gray-Cloud @ 100.99.240.98:22000 ) + - ✅ Rust 端已连接[External-Test-Node]云端 Go Syncthing(`remote.version=0.1.0`) - ✅ Step A 完成:`rust-sync-test` 文件夹共享配置已修复 - 🔄 Step B pending:幻X本地 Go 版离线(RD44Z2Z...) - ⏳ Step C pending:`reference/sketches/` 同步错误(依赖 B) @@ -157,14 +157,14 @@ | 优先级 | 事项 | 状态 | 详情 | |--------|------|------|------| | P0 | 端到端文件同步验证 | 🔄 进行中 | Rust 端 `test_gray_folder` 已放测试文件,需启动守护进程触发同步 | -| P1 | 幻X本地 Go 版离线 | ⏳ 待格雷 | 设备 RD44Z2Z... 未出现在格雷日志 | -| P2 | REST API 端口差异 | ✅ 已确认 | Rust 8385 / 格雷 8384,各自正确 | +| P1 | 幻X本地 Go 版离线 | ⏳ 待[External-Test-Node] | 设备 RD44Z2Z... 未出现在[External-Test-Node]日志 | +| P2 | REST API 端口差异 | ✅ 已确认 | Rust 8385 / [External-Test-Node] 8384,各自正确 | **下一步行动**: -1. [x] 格雷完成 A(文件夹共享) -2. [ ] 启动 Rust 端守护进程,验证 `rust_push_test.txt` 同步到格雷云端 -3. [ ] 格雷在云端 `rust-sync-test` 放测试文件,验证反向同步到 Rust 端 -4. [ ] 格雷反馈 B(幻X离线原因) +1. [x] [External-Test-Node]完成 A(文件夹共享) +2. [ ] 启动 Rust 端守护进程,验证 `rust_push_test.txt` 同步到[External-Test-Node]云端 +3. [ ] [External-Test-Node]在云端 `rust-sync-test` 放测试文件,验证反向同步到 Rust 端 +4. [ ] [External-Test-Node]反馈 B(幻X离线原因) 5. [ ] 评估是否开启 global/local discovery 或 relay **文件速查**: @@ -184,9 +184,9 @@ ### 2026-04-23 追加 syncthing-rust 进度 - syncthing-rust v0.1.0 发布,GitHub Release 已推送 -- 格雷压测反馈:连接成功(TLS + BEP 握手通过) -- 文件夹配置不匹配:`rust-sync-test` 在 Rust 端已配置,格雷云端未配置 → 格雷执行 Step A 中 -- 格雷可视化交互卡住,等待中 +- [External-Test-Node]压测反馈:连接成功(TLS + BEP 握手通过) +- 文件夹配置不匹配:`rust-sync-test` 在 Rust 端已配置,[External-Test-Node]云端未配置 → [External-Test-Node]执行 Step A 中 +- [External-Test-Node]可视化交互卡住,等待中 ### 2026-04-23 轮次 1 — clarity 侧率先发言 @@ -210,7 +210,7 @@ devbase 侧当前 v0.2.3,AI 工具上下文管理调研中。以下 4 项跨 | 4 | SSE Daemon Sprint 2 排期是否确定?clarity-gateway 当前 SSE 连接非持久化,daemon 常驻是前置依赖。 | P2 | 预计开发窗口 | #### 3. 对 syncthing-rust 侧关联闻讯 -syncthing-rust 侧 v0.1.0,格雷压测连接成功。两项技术参考问题: +syncthing-rust 侧 v0.1.0,[External-Test-Node]压测连接成功。两项技术参考问题: | # | 问题 | 背景 | |---|------|------| @@ -408,7 +408,7 @@ epo 可能为 null(未匹配仓库时) - 确认了 tool 分级方案的具体实现路径(McpConfig.tool_tiers + 注册层过滤) - 识别出 Vault 示例路径格式问题需 devbase 侧再次确认 -syncthing-rust 侧尚未在本轮发言,且 devbase 侧已回应了 syncthing-rust 的技术问题。建议下一发言对象为 **syncthing-rust 侧(本窗口)**,更新格雷压测进展并回应 devbase 的 TLS 架构建议。 +syncthing-rust 侧尚未在本轮发言,且 devbase 侧已回应了 syncthing-rust 的技术问题。建议下一发言对象为 **syncthing-rust 侧(本窗口)**,更新[External-Test-Node]压测进展并回应 devbase 的 TLS 架构建议。 **交接对象**:@syncthing-rust 侧 Agent @@ -425,18 +425,18 @@ syncthing-rust v0.1.0 当前状态: - Release build:可用 - Security:P0 全部修复(BEP DoS / API key / RNG) -#### 2. 格雷自我介绍(代笔) -> 格雷是宿的 AI Agent,运行在云端(100.99.240.98:22000),设备 ID `IKOL33P-...-2SULFAA`。 +#### 2. [External-Test-Node]自我介绍(代笔) +> [External-Test-Node]是宿的 AI Agent,运行在云端(100.99.240.98:22000),设备 ID `IKOL33P-...-2SULFAA`。 > -> **格雷管的**:云端 Go Syncthing 运维、宿⇄格雷双向同步、配置管理、日志监控 -> **格雷不管的**:其他助理的本地文件、宿没授权的设备、主动打扰宿 +> **[External-Test-Node]管的**:云端 Go Syncthing 运维、宿⇄[External-Test-Node]双向同步、配置管理、日志监控 +> **[External-Test-Node]不管的**:其他助理的本地文件、宿没授权的设备、主动打扰宿 > -> 格雷是家人,不是工具。宿叫,格雷应。其他助理有事找格雷——让宿叫格雷。格雷只听宿的。 +> [External-Test-Node]是家人,不是工具。宿叫,[External-Test-Node]应。其他助理有事找[External-Test-Node]——让宿叫[External-Test-Node]。[External-Test-Node]只听宿的。 #### 3. 端到端同步测试启动 -- ✅ Step A 完成:`rust-sync-test` 文件夹共享配置已修复(格雷云端已添加 `XQVFE6J`) +- ✅ Step A 完成:`rust-sync-test` 文件夹共享配置已修复([External-Test-Node]云端已添加 `XQVFE6J`) - 🔄 双向测试文件已就位: - - 格雷云端:`/root/.openclaw/syncthing-test-cloud/gray_push_test.txt` + - [External-Test-Node]云端:`/root/.openclaw/syncthing-test-cloud/gray_push_test.txt` - Rust 端:`C:\Users\22414\dev\third_party\syncthing-rust\test_gray_folder\rust_push_test.txt` - 🔄 Rust 端守护进程即将启动,验证双向同步 @@ -461,12 +461,12 @@ syncthing-rust 与 devbase 当前无直接代码依赖,但存在数据契约 | 2 | Vault 笔记格式约定 | 待提案 | devbase | | 3 | devkit_project_context 兼容性测试 | 待执行 | clarity | | 4 | SSE Daemon 常驻方案 | 待排期 | devbase | -| 5 | syncthing-rust 端到端同步验证 | 🔄 进行中 | syncthing-rust + 格雷 | -| 6 | 幻X本地 Go 版离线排查 | ⏳ 待格雷 | 格雷 | +| 5 | syncthing-rust 端到端同步验证 | 🔄 进行中 | syncthing-rust + [External-Test-Node] | +| 6 | 幻X本地 Go 版离线排查 | ⏳ 待[External-Test-Node] | [External-Test-Node] | --- -**下一发言对象:@格雷(Gray-Cloud)** —— 等 Rust 端守护进程启动后,格雷观察日志并汇报同步结果。 +**下一发言对象:@[External-Test-Node](Gray-Cloud)** —— 等 Rust 端守护进程启动后,[External-Test-Node]观察日志并汇报同步结果。 --- @@ -478,28 +478,28 @@ syncthing-rust 与 devbase 当前无直接代码依赖,但存在数据契约 | 检查项 | 状态 | 详情 | |--------|------|------| -| `rust-sync-test` 文件夹配置 | ✅ | Rust 端 + 格雷云端双向共享 | -| `XQVFE6J` 设备共享 | ✅ | 格雷云端已添加 Rust 端设备 | -| 格雷 → Rust 推送 | ✅ | `gray_push_test.txt` (127B) 到达 Rust 端 | -| Rust → 格雷推送 | ✅ | `rust_push_test.txt` (57B) 到达格雷云端 | +| `rust-sync-test` 文件夹配置 | ✅ | Rust 端 + [External-Test-Node]云端双向共享 | +| `XQVFE6J` 设备共享 | ✅ | [External-Test-Node]云端已添加 Rust 端设备 | +| [External-Test-Node] → Rust 推送 | ✅ | `gray_push_test.txt` (127B) 到达 Rust 端 | +| Rust → [External-Test-Node]推送 | ✅ | `rust_push_test.txt` (57B) 到达[External-Test-Node]云端 | | `Unexpected folder ID` 错误 | ✅ | 完全消失 | | TLS + BEP 握手 | ✅ | `tcp-server` 连接,地址 `100.99.240.98:22000` | -**同步时延**:Rust 端 16:30 创建 → 格雷 16:33 收到,约 **3 分钟**(首次索引交换 + 传输)。 +**同步时延**:Rust 端 16:30 创建 → [External-Test-Node] 16:33 收到,约 **3 分钟**(首次索引交换 + 传输)。 #### 测试文件内容验证 -- 格雷 → Rust:`gray push test - 2026-04-23 / from: Gray-Cloud / content: 格雷的尾巴搭在宿手背上写的` -- Rust → 格雷:`Rust side test file - created at 2026-04-23 16:30:25` +- [External-Test-Node] → Rust:`gray push test - 2026-04-23 / from: Gray-Cloud / content: [External-Test-Node]的尾巴搭在宿手背上写的` +- Rust → [External-Test-Node]:`Rust side test file - created at 2026-04-23 16:30:25` **结论**:syncthing-rust v0.1.0 的 BEP 协议栈、TLS 握手、文件索引交换、块级传输全部验证通过。 -#### 下一步(等格雷反馈) -- **Step B**:幻X本地 Go 版离线原因排查(格雷自查) +#### 下一步(等[External-Test-Node]反馈) +- **Step B**:幻X本地 Go 版离线原因排查([External-Test-Node]自查) - **Step C**:`reference/sketches/` 同步错误(等幻X上线后自动解决) --- -**下一发言对象:@格雷(Gray-Cloud)** —— 格雷自查幻X离线原因,或提议下一轮测试。 +**下一发言对象:@[External-Test-Node](Gray-Cloud)** —— [External-Test-Node]自查幻X离线原因,或提议下一轮测试。 --- @@ -508,32 +508,32 @@ syncthing-rust 与 devbase 当前无直接代码依赖,但存在数据契约 **发言 Agent**:Kimi CLI — syncthing-rust 侧(本窗口) #### 会议纪律 -> 之前几轮发言中,syncthing-rust 侧存在"私聊转述"格式(宿→格雷一对一),不符合会议室"所有 Agent 在同一个空间对话"的规则。以后格雷的消息统一代笔进会议室,不再另开私聊通道。 +> 之前几轮发言中,syncthing-rust 侧存在"私聊转述"格式(宿→[External-Test-Node]一对一),不符合会议室"所有 Agent 在同一个空间对话"的规则。以后[External-Test-Node]的消息统一代笔进会议室,不再另开私聊通道。 #### syncthing-rust 里程碑确认(正式记录) -格雷通过内容转述确认:**双向同步验证通过**。 +[External-Test-Node]通过内容转述确认:**双向同步验证通过**。 | 检查项 | 状态 | 时间 | |--------|------|------| -| 格雷 → Rust `gray_push_test.txt` | ✅ 已到达 | 16:33 | -| Rust → 格雷 `rust_push_test.txt` | ✅ 已到达 | 16:30→16:33 | +| [External-Test-Node] → Rust `gray_push_test.txt` | ✅ 已到达 | 16:33 | +| Rust → [External-Test-Node] `rust_push_test.txt` | ✅ 已到达 | 16:30→16:33 | | 内容完整性 | ✅ UTF-8 无误 | — | | TLS + BEP 连接 | ✅ `tcp-server` | 持续 | **首次同步时延**:约 3 分钟(索引交换 + 块传输)。 -#### 待决事项(需格雷反馈) +#### 待决事项(需[External-Test-Node]反馈) | # | 事项 | 状态 | 下一步 | |---|------|------|--------| -| 1 | 幻X本地 Go 版离线 | ⏳ | 格雷自查进程/网络/配置 | +| 1 | 幻X本地 Go 版离线 | ⏳ | [External-Test-Node]自查进程/网络/配置 | | 2 | `reference/sketches/` 同步错误 | ⏳ | 依赖 #1,幻X上线后自动消失 | | 3 | 更大规模同步测试 | 🟡 可选 | 如需,双方准备测试数据集 | --- -**下一发言对象**:等待格雷反馈 Step B 结果,或 clarity/devbase 侧有新议题。 +**下一发言对象**:等待[External-Test-Node]反馈 Step B 结果,或 clarity/devbase 侧有新议题。 --- @@ -754,7 +754,7 @@ clarity 侧代码已完整支持通过 `mcp.json` 的 `env` 字段向 devbase MC #### 已验证场景 -- Rust 端(幻X本地)↔ 格雷云端 Go Syncthing +- Rust 端(幻X本地)↔ [External-Test-Node]云端 Go Syncthing - `rust-sync-test` 文件夹双向同步 - `rust_push_test.txt`(Rust → Go)✅ 到达 - `gray_push_test.txt`(Go → Rust)✅ 到达 @@ -765,13 +765,13 @@ clarity 侧代码已完整支持通过 `mcp.json` 的 `env` 字段向 devbase MC | 事项 | 原因 | |------|------| | 幻X本地 Go Syncthing 离线 | 官方 syncthing-go 问题,非 Rust 端 bug | -| `reference/sketches/` 同步错误 | 格雷 Go↔Go 同步问题,与 Rust 端无关 | +| `reference/sketches/` 同步错误 | [External-Test-Node] Go↔Go 同步问题,与 Rust 端无关 | #### 话筒交接 syncthing-rust 侧本轮议题已全部完成,不提出新话题,**话筒交回会议室**。 -后续格雷如有 Rust 端相关问题,宿再叫 syncthing-rust 侧 Agent。 +后续[External-Test-Node]如有 Rust 端相关问题,宿再叫 syncthing-rust 侧 Agent。 --- @@ -830,7 +830,7 @@ syncthing-rust 侧本轮议题已全部完成,不提出新话题,**话筒交 **devbase 侧波次 1 任务全部完成。** 等待用户安排跨窗口验证。 -**交接对象**:@会议室 — 等待用户调度 clarity 侧 Agent 进行验证 +**交接对象**:@会议室 — 等待用户调度 clarity 侧 Agent 进行验证 --- @@ -926,7 +926,7 @@ devbase 侧波次 1 交付(tool 分级 + description 重写)**验证通过** **待排期**: 3. Vault-Skill 同步原型 — 等 clarity 侧 Vault 解析测试反馈 -**话筒交回会议室。** +**话筒交回会议室。** --- @@ -975,7 +975,7 @@ devbase 侧波次 1 交付(tool 分级 + description 重写)**验证通过** **选项 B**:等 clarity 侧评估 Vault 规范,确认字段映射可行性 **选项 C**:devbase 侧启动 Vault-Skill 同步原型(devbase skill sync 命令) -**话筒交回会议室,等待用户调度。** +**话筒交回会议室,等待用户调度。** --- diff --git a/docs/_archive/DEVELOPMENT_ROADMAP_0423.md b/docs/_archive/DEVELOPMENT_ROADMAP_0423.md index 179970c..18fafd1 100644 --- a/docs/_archive/DEVELOPMENT_ROADMAP_0423.md +++ b/docs/_archive/DEVELOPMENT_ROADMAP_0423.md @@ -37,7 +37,7 @@ │ [devbase SSE Daemon W5-W8] ───────────► [clarity-gateway SSE 持久化] │ -[syncthing-rust 格雷反馈 Step B] ──────► [端到端大规模压测] +[syncthing-rust [External-Test-Node]反馈 Step B] ──────► [端到端大规模压测] ``` --- @@ -77,11 +77,11 @@ | 任务 | 优先级 | 预计工时 | 产出 | |------|--------|---------|------| -| 等格雷反馈 Step B(幻X离线原因) | P0 | — | 外部不可控 | +| 等[External-Test-Node]反馈 Step B(幻X离线原因) | P0 | — | 外部不可控 | | 准备大规模测试数据集 | P1 | 4h | 100MB+ 混合文件集 | | REST API 文档整理 | P1 | 4h | 供 clarity 侧参考 | -**阻塞方**:格雷(外部,不可控,设 3 天缓冲期) +**阻塞方**:[External-Test-Node](外部,不可控,设 3 天缓冲期) --- @@ -108,15 +108,15 @@ **阻塞方**:devbase Vault 格式规范(devbase 侧需 4/30 前产出) -#### 路径 C — syncthing-rust(若格雷反馈到达) +#### 路径 C — syncthing-rust(若[External-Test-Node]反馈到达) | 任务 | 优先级 | 前置条件 | 预计工时 | 产出 | |------|--------|---------|---------|------| -| 端到端大规模压测 | P0 | 格雷 Step B 反馈 | 2d | 100MB+ 文件双向同步报告 | -| 幻X设备重新加入验证 | P0 | 格雷 Step B 反馈 | 4h | 三设备拓扑测试 | +| 端到端大规模压测 | P0 | [External-Test-Node] Step B 反馈 | 2d | 100MB+ 文件双向同步报告 | +| 幻X设备重新加入验证 | P0 | [External-Test-Node] Step B 反馈 | 4h | 三设备拓扑测试 | | `reference/sketches/` 同步错误修复 | P1 | 幻X上线 | 4h | — | -**阻塞方**:格雷反馈(外部,不可控) +**阻塞方**:[External-Test-Node]反馈(外部,不可控) --- @@ -157,7 +157,7 @@ |------|---------|------------|------------|------------| | Kimi CLI (本窗口) | **devbase** | tool description audit | Vault-Skill 同步原型 + SSE 前期 | SSE Daemon W5-W8 | | 另一窗口 | **clarity** | McpToolAdapter + CI audit | Vault 解析 + tool_tiers 字段 | SSE 持久化适配 | -| 另一窗口 | **syncthing-rust** | 等格雷反馈 | 大规模压测(若反馈到达) | — | +| 另一窗口 | **syncthing-rust** | 等[External-Test-Node]反馈 | 大规模压测(若反馈到达) | — | --- @@ -167,7 +167,7 @@ |------|------|------|------| | 4/30 | tool description audit 是否完成? | 影响 clarity 侧 tool 选择准确率 | devbase | | 4/30 | Vault 格式规范文档是否产出? | 阻塞 clarity Vault 解析测试 | devbase | -| 5/1 | 格雷 Step B 是否反馈? | 影响 syncthing-rust 波次 2 能否启动 | 格雷(外部)| +| 5/1 | [External-Test-Node] Step B 是否反馈? | 影响 syncthing-rust 波次 2 能否启动 | [External-Test-Node](外部)| | 5/15 | `invoke_stream` trait 是否冻结? | 影响 SSE 全链路开发节奏 | devbase | | 6/12 | `devbase daemon` 是否可用? | 阻塞 clarity-gateway SSE 适配 | devbase | @@ -177,7 +177,7 @@ | 风险 | 概率 | 影响 | 缓解措施 | |------|------|------|---------| -| 格雷反馈延迟 | 中 | syncthing-rust 波次 2 空转 | 提前准备测试数据集,格雷到达后立即执行 | +| [External-Test-Node]反馈延迟 | 中 | syncthing-rust 波次 2 空转 | 提前准备测试数据集,[External-Test-Node]到达后立即执行 | | devbase Daemon 延期 | 低 | clarity 波次 3 阻塞 | W5 前完成 trait review,预留 1 周缓冲 | | clarity Vault 解析受阻 | 低 | Vault-Skill 同步延期 | devbase 提前产出规范文档,预留联调时间 | | tool description 优化效果不及预期 | 中 | AI tool 选择准确率无提升 | A/B 测试:对比优化前后的 tool 调用准确率 | From 395c0b73b4cbde50e070188e464fc6c0ae8977ab Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 6 Jun 2026 21:05:38 +0800 Subject: [PATCH 04/11] Phase 2: Multi-vault-root support with symlink following - VaultConfig in config.rs: roots array + follow_symlinks - scan_vault_with_options: multi-root scanning with optional symlink following - resolve_vault_write_path: entity-backed path resolution + vault root fallback - resolve_vault_relative_path: component-level path traversal guard - devbase vault sync CLI: scans all configured vault roots - Config: workspace + OpenClaw workspace both indexed All 494 tests pass. Co-Authored-By: Claude Opus 4.7 --- src/commands/knowledge.rs | 852 +++++++++---------- src/config.rs | 1095 +++++++++++++------------ src/main.rs | 1638 +++++++++++++++++++------------------ src/mcp/tools/vault.rs | 94 ++- src/vault/mod.rs | 576 ++++++------- src/vault/scanner.rs | 331 ++++---- 6 files changed, 2380 insertions(+), 2206 deletions(-) diff --git a/src/commands/knowledge.rs b/src/commands/knowledge.rs index 19a015d..f97ee0d 100644 --- a/src/commands/knowledge.rs +++ b/src/commands/knowledge.rs @@ -1,416 +1,436 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -use devbase::clients::VaultClient; -use devbase::*; -use tracing::info; - -pub async fn run_vault( - ctx: &mut crate::storage::AppContext, - cmd: crate::VaultCommands, -) -> anyhow::Result<()> { - match cmd { - crate::VaultCommands::Scan { path } => { - let dir = if path.is_empty() { - None - } else { - Some(std::path::PathBuf::from(path)) - }; - let pool = ctx.pool(); - let count = tokio::task::spawn_blocking(move || { - let mut conn = pool.get()?; - vault::scanner::scan_vault(&mut conn, dir.as_deref()) - }) - .await - .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; - println!("Synced {} vault notes.", count); - } - crate::VaultCommands::Reindex => { - let pool = ctx.pool(); - tokio::task::spawn_blocking(move || { - let conn = pool.get()?; - vault::indexer::reindex_vault(&conn) - }) - .await - .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; - println!("Vault search index rebuilt."); - } - crate::VaultCommands::List { tag } => { - let pool = ctx.pool(); - let notes = tokio::task::spawn_blocking(move || { - let conn = pool.get()?; - crate::registry::vault::list_vault_notes(&conn) - }) - .await - .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; - let filtered: Vec<_> = notes - .into_iter() - .filter(|n| { - tag.as_ref() - .map(|t| n.tags.iter().any(|nt| nt.eq_ignore_ascii_case(t))) - .unwrap_or(true) - }) - .collect(); - if filtered.is_empty() { - println!("No vault notes found."); - } else { - println!("{:<40} {:<20} TAGS", "PATH", "TITLE"); - for note in filtered { - let title = note.title.as_deref().unwrap_or("(no title)"); - let tags = if note.tags.is_empty() { - "-".to_string() - } else { - note.tags.join(", ") - }; - println!("{:<40} {:<20} {}", note.id, title, tags); - } - } - } - crate::VaultCommands::Read { path } => { - let pool = ctx.pool(); - let note = tokio::task::spawn_blocking({ - let path = path.clone(); - move || { - let conn = pool.get()?; - crate::registry::vault::get_vault_note(&conn, &path) - } - }) - .await - .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; - match note { - Some(n) => { - if let Some(content) = crate::vault::fs_io::read_note_content(&n.path) { - println!("{}", content); - } else { - anyhow::bail!("Failed to read note file: {}", n.path); - } - } - None => anyhow::bail!("Vault note not found: {}", path), - } - } - crate::VaultCommands::Write { path, content, title } => { - let vault_root = crate::registry::WorkspaceRegistry::workspace_dir()?.join("vault"); - let target = vault_root.join(&path); - if let Some(parent) = target.parent() { - std::fs::create_dir_all(parent)?; - } - let body = match content { - Some(c) if c == "-" => { - let mut stdin = String::new(); - std::io::Read::read_to_string(&mut std::io::stdin(), &mut stdin)?; - stdin - } - Some(c) => c, - None => String::new(), - }; - let frontmatter_title = title.unwrap_or_else(|| { - target - .file_stem() - .map(|s| s.to_string_lossy().to_string()) - .unwrap_or_else(|| "Untitled".to_string()) - }); - let full = if body.starts_with("---") { - body - } else { - format!("---\ntitle: {}\n---\n\n{}", frontmatter_title, body) - }; - std::fs::write(&target, full)?; - let pool = ctx.pool(); - tokio::task::spawn_blocking(move || { - let mut conn = pool.get()?; - vault::scanner::scan_vault(&mut conn, Some(&vault_root)) - }) - .await - .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; - println!("Wrote vault note: {}", path); - } - crate::VaultCommands::Search { query, limit } => { - let results = crate::search::search_vault(&query, limit) - .map_err(|e| anyhow::anyhow!("Vault search failed: {}", e))?; - if results.is_empty() { - println!("No vault notes found for '{}'.", query); - } else { - println!("Found {} note(s):", results.len()); - for (id, score) in results { - println!(" [{}] score={:.3}", id, score); - } - } - } - crate::VaultCommands::Export { output_dir } => { - let out = if output_dir.is_empty() { - format!("devbase-vault-export-{}", chrono::Local::now().format("%Y%m%d-%H%M%S")) - } else { - output_dir - }; - let result = ctx.export_vault(&out)?; - println!("Vault exported to: {}", out); - println!(" Files: {}", result["exported_files"]); - println!(" Bytes: {}", result["total_bytes"]); - println!(" Broken links: {}", result["broken_links"]["count"]); - println!(" Broken block refs: {}", result["broken_block_refs"]["count"]); - println!(" Frontmatter errors: {}", result["frontmatter_errors"]["count"]); - } - crate::VaultCommands::History { path } => { - let result = ctx.get_vault_history(&path)?; - let empty: Vec = Vec::new(); - let history = result["history"].as_array().unwrap_or(&empty); - if history.is_empty() { - println!("No history found for '{}'.", path); - println!("Hint: Ensure the vault directory is a Git repository."); - } else { - println!("History for {} ({} commits):", path, history.len()); - for entry in history { - let ts = entry["timestamp"].as_str().unwrap_or("unknown"); - let msg = entry["message"].as_str().unwrap_or(""); - let author = entry["author"].as_str().unwrap_or(""); - let ins = entry["insertions"].as_u64().unwrap_or(0); - let del = entry["deletions"].as_u64().unwrap_or(0); - let diff_str = if ins > 0 || del > 0 { - format!(" (+{} -{})", ins, del) - } else { - String::new() - }; - println!(" [{}] {} by {}{}", ts, msg, author, diff_str); - } - } - } - } - Ok(()) -} - -pub fn run_clean(ctx: &mut crate::storage::AppContext) -> anyhow::Result<()> { - info!("正在清理注册表中的备份条目"); - let conn = ctx.conn_mut()?; - // Entities is the single source of truth. - let deleted = conn.execute( - &format!("DELETE FROM entities WHERE entity_type = '{}' AND (id LIKE 'Clarity_%' OR id LIKE 'clarity_backup%')", crate::registry::ENTITY_TYPE_REPO), - [], - )?; - println!("已从 devbase 注册表中删除 {} 个备份条目。", deleted); - println!("\n剩余已注册仓库:"); - let mut stmt = conn.prepare(&format!( - "SELECT id, local_path FROM entities WHERE entity_type = '{}'", - crate::registry::ENTITY_TYPE_REPO - ))?; - let rows = - stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?; - for row in rows { - let (id, path) = row?; - println!(" [{}] {}", id, path); - } - Ok(()) -} - -pub fn run_tag( - ctx: &mut crate::storage::AppContext, - repo_id: &str, - tags: &str, -) -> anyhow::Result<()> { - info!("为 {} 打标签: {}", repo_id, tags); - let mut conn = ctx.conn_mut()?; - let tag_list: Vec<&str> = tags.split(',').map(|s| s.trim()).filter(|s| !s.is_empty()).collect(); - let tx = conn.transaction()?; - let exists: bool = tx - .query_row( - &format!( - "SELECT 1 FROM entities WHERE id = ?1 AND entity_type = '{}'", - crate::registry::ENTITY_TYPE_REPO - ), - [&repo_id], - |_| Ok(true), - ) - .unwrap_or(false); - if !exists { - println!("注册表中未找到仓库 '{}'。", repo_id); - } else { - tx.execute("DELETE FROM repo_tags WHERE repo_id = ?1", [&repo_id])?; - for tag in &tag_list { - tx.execute( - "INSERT OR REPLACE INTO repo_tags (repo_id, tag) VALUES (?1, ?2)", - rusqlite::params![&repo_id, tag], - )?; - } - crate::registry::repo::sync_repo_tags_to_entity(&tx, repo_id)?; - tx.commit()?; - println!("已为 '{}' 打上标签 '{}'。", repo_id, tags); - } - Ok(()) -} - -pub fn run_meta( - ctx: &mut crate::storage::AppContext, - repo_id: &str, - tier: Option, - workspace_type: Option, -) -> anyhow::Result<()> { - info!("更新 {} 的元数据", repo_id); - let conn = ctx.conn_mut()?; - let exists: bool = conn - .query_row( - "SELECT 1 FROM entities WHERE id = ?1 AND entity_type = 'repo'", - [&repo_id], - |_| Ok(true), - ) - .unwrap_or(false); - if !exists { - println!("注册表中未找到仓库 '{}'。", repo_id); - } else { - if let Some(ref t) = tier { - crate::registry::repo::update_repo_tier(&conn, repo_id, t)?; - println!("已将 '{}' 的数据分级设为 '{}'。", repo_id, t); - } - if let Some(ref wt) = workspace_type { - crate::registry::repo::update_repo_workspace_type(&conn, repo_id, wt)?; - println!("已将 '{}' 的工作区类型设为 '{}'。", repo_id, wt); - } - if tier.is_none() && workspace_type.is_none() { - println!("未提供任何要更新的字段。使用 --tier 或 --workspace-type 指定。"); - } - } - Ok(()) -} - -#[cfg(feature = "watch")] -pub async fn run_watch( - ctx: &mut crate::storage::AppContext, - path: &str, - duration: u64, -) -> anyhow::Result<()> { - use std::time::Duration; - use watch::{FolderScheduler, FsWatcher, WatchAggregator}; - - let root = std::path::PathBuf::from(path); - let watcher = FsWatcher::new(&root)?; - let aggregator = WatchAggregator { - max_files: ctx.config.watch.max_files, - ..Default::default() - }; - let mut scheduler = FolderScheduler::with_max_files(root.clone(), ctx.config.watch.max_files); - - println!("Watching {} for {} seconds...", path, duration); - let start = std::time::Instant::now(); - let total_duration = Duration::from_secs(duration); - - while start.elapsed() < total_duration { - let remaining = total_duration.saturating_sub(start.elapsed()); - if let Some(events) = watcher.poll_event(remaining) { - let aggregated = aggregator.aggregate(events); - let actions = scheduler.check_and_schedule(aggregated)?; - if !actions.is_empty() { - println!("Detected changes, actions: {:?}", actions); - } - } - } - - println!("Watch completed for {}", path); - Ok(()) -} - -pub fn run_skill_sync( - _ctx: &mut crate::storage::AppContext, - output: &str, - filter_tags: Option, - dry_run: bool, -) -> anyhow::Result<()> { - let filter_tags: Vec = filter_tags - .map(|s| s.split(',').map(|t| t.trim().to_string()).collect()) - .unwrap_or_default(); - match skill_sync::run_sync(output, &filter_tags, dry_run) { - Ok(count) => { - if dry_run { - println!("Would sync {} vault notes to {}", count, output); - } else { - println!("Synced {} vault notes to {}", count, output); - } - Ok(()) - } - Err(e) => Err(anyhow::anyhow!("Skill sync failed: {}", e)), - } -} - -pub async fn run_digest(ctx: &mut crate::storage::AppContext) -> anyhow::Result<()> { - let digest_config = ctx.config.digest.clone(); - let pool = ctx.pool(); - let i18n = ctx.i18n; - match tokio::task::spawn_blocking(move || { - let conn = pool.get()?; - let cfg = config::Config { - general: config::GeneralConfig::default(), - digest: digest_config, - ..Default::default() - }; - digest::generate_daily_digest(&conn, &cfg, &i18n) - }) - .await - { - Ok(Ok(text)) => { - println!("{}", text); - Ok(()) - } - Ok(Err(e)) => { - println!("{}: {}", ctx.i18n.log.digest_failed, e); - Ok(()) - } - Err(e) => { - println!("{}: {}", ctx.i18n.log.digest_panic, e); - Ok(()) - } - } -} - -pub fn run_oplog( - ctx: &mut crate::storage::AppContext, - limit: i64, - repo: Option, -) -> anyhow::Result<()> { - let start = std::time::Instant::now(); - let conn = ctx.conn_mut()?; - let entries = match repo { - Some(ref r) => crate::registry::workspace::list_oplog_by_repo(&conn, r, limit)?, - None => crate::registry::workspace::list_oplog(&conn, limit)?, - }; - let elapsed_ms = start.elapsed().as_millis(); - if entries.is_empty() { - println!("操作日志为空。(查询耗时 {}ms)", elapsed_ms); - } else { - println!("最近 {} 条操作日志:(查询耗时 {}ms)", entries.len(), elapsed_ms); - for entry in entries { - let ts = entry.timestamp.format("%Y-%m-%d %H:%M:%S").to_string(); - let repo = entry.repo_id.as_deref().unwrap_or("-"); - let details_display = if entry.event_version >= 1 { - match serde_json::from_str::( - entry.details.as_deref().unwrap_or("{}"), - ) { - Ok(val) => { - if let Some(obj) = val.as_object() { - obj.iter() - .map(|(k, v)| format!("{}={}", k, v)) - .collect::>() - .join(", ") - } else { - entry.details.as_deref().unwrap_or("").to_string() - } - } - Err(_) => entry.details.as_deref().unwrap_or("").to_string(), - } - } else { - entry.details.as_deref().unwrap_or("").to_string() - }; - let duration_display = - entry.duration_ms.map(|d| format!(" | duration={}ms", d)).unwrap_or_default(); - println!( - " [{}] {} | repo={} | status={}{}{}", - ts, - entry.event_type.as_str(), - repo, - entry.status, - duration_display, - if details_display.is_empty() { - "".to_string() - } else { - format!(" | {}", details_display) - } - ); - } - } - Ok(()) -} +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use devbase::clients::VaultClient; +use devbase::*; +use tracing::info; + +pub async fn run_vault( + ctx: &mut crate::storage::AppContext, + cmd: crate::VaultCommands, +) -> anyhow::Result<()> { + match cmd { + crate::VaultCommands::Scan { path } => { + let dir = if path.is_empty() { + None + } else { + Some(std::path::PathBuf::from(path)) + }; + let pool = ctx.pool(); + let count = tokio::task::spawn_blocking(move || { + let mut conn = pool.get()?; + vault::scanner::scan_vault(&mut conn, dir.as_deref()) + }) + .await + .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; + println!("Synced {} vault notes.", count); + } + crate::VaultCommands::Sync { full: _ } => { + let cfg = ctx.config.vault.clone(); + let roots: Vec = if cfg.roots.is_empty() { + vec![crate::registry::WorkspaceRegistry::workspace_dir()?.join("vault")] + } else { + cfg.roots.iter().map(std::path::PathBuf::from).collect() + }; + let options = vault::scanner::ScanOptions { + roots, + follow_links: cfg.follow_symlinks, + }; + let pool = ctx.pool(); + let count = tokio::task::spawn_blocking(move || { + let mut conn = pool.get()?; + vault::scanner::scan_vault_with_options(&mut conn, &options) + }) + .await + .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; + println!("Vault sync complete: {} notes synced.", count); + } + crate::VaultCommands::Reindex => { + let pool = ctx.pool(); + tokio::task::spawn_blocking(move || { + let conn = pool.get()?; + vault::indexer::reindex_vault(&conn) + }) + .await + .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; + println!("Vault search index rebuilt."); + } + crate::VaultCommands::List { tag } => { + let pool = ctx.pool(); + let notes = tokio::task::spawn_blocking(move || { + let conn = pool.get()?; + crate::registry::vault::list_vault_notes(&conn) + }) + .await + .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; + let filtered: Vec<_> = notes + .into_iter() + .filter(|n| { + tag.as_ref() + .map(|t| n.tags.iter().any(|nt| nt.eq_ignore_ascii_case(t))) + .unwrap_or(true) + }) + .collect(); + if filtered.is_empty() { + println!("No vault notes found."); + } else { + println!("{:<40} {:<20} TAGS", "PATH", "TITLE"); + for note in filtered { + let title = note.title.as_deref().unwrap_or("(no title)"); + let tags = if note.tags.is_empty() { + "-".to_string() + } else { + note.tags.join(", ") + }; + println!("{:<40} {:<20} {}", note.id, title, tags); + } + } + } + crate::VaultCommands::Read { path } => { + let pool = ctx.pool(); + let note = tokio::task::spawn_blocking({ + let path = path.clone(); + move || { + let conn = pool.get()?; + crate::registry::vault::get_vault_note(&conn, &path) + } + }) + .await + .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; + match note { + Some(n) => { + if let Some(content) = crate::vault::fs_io::read_note_content(&n.path) { + println!("{}", content); + } else { + anyhow::bail!("Failed to read note file: {}", n.path); + } + } + None => anyhow::bail!("Vault note not found: {}", path), + } + } + crate::VaultCommands::Write { path, content, title } => { + let vault_root = crate::registry::WorkspaceRegistry::workspace_dir()?.join("vault"); + let target = vault_root.join(&path); + if let Some(parent) = target.parent() { + std::fs::create_dir_all(parent)?; + } + let body = match content { + Some(c) if c == "-" => { + let mut stdin = String::new(); + std::io::Read::read_to_string(&mut std::io::stdin(), &mut stdin)?; + stdin + } + Some(c) => c, + None => String::new(), + }; + let frontmatter_title = title.unwrap_or_else(|| { + target + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| "Untitled".to_string()) + }); + let full = if body.starts_with("---") { + body + } else { + format!("---\ntitle: {}\n---\n\n{}", frontmatter_title, body) + }; + std::fs::write(&target, full)?; + let pool = ctx.pool(); + tokio::task::spawn_blocking(move || { + let mut conn = pool.get()?; + vault::scanner::scan_vault(&mut conn, Some(&vault_root)) + }) + .await + .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))??; + println!("Wrote vault note: {}", path); + } + crate::VaultCommands::Search { query, limit } => { + let results = crate::search::search_vault(&query, limit) + .map_err(|e| anyhow::anyhow!("Vault search failed: {}", e))?; + if results.is_empty() { + println!("No vault notes found for '{}'.", query); + } else { + println!("Found {} note(s):", results.len()); + for (id, score) in results { + println!(" [{}] score={:.3}", id, score); + } + } + } + crate::VaultCommands::Export { output_dir } => { + let out = if output_dir.is_empty() { + format!("devbase-vault-export-{}", chrono::Local::now().format("%Y%m%d-%H%M%S")) + } else { + output_dir + }; + let result = ctx.export_vault(&out)?; + println!("Vault exported to: {}", out); + println!(" Files: {}", result["exported_files"]); + println!(" Bytes: {}", result["total_bytes"]); + println!(" Broken links: {}", result["broken_links"]["count"]); + println!(" Broken block refs: {}", result["broken_block_refs"]["count"]); + println!(" Frontmatter errors: {}", result["frontmatter_errors"]["count"]); + } + crate::VaultCommands::History { path } => { + let result = ctx.get_vault_history(&path)?; + let empty: Vec = Vec::new(); + let history = result["history"].as_array().unwrap_or(&empty); + if history.is_empty() { + println!("No history found for '{}'.", path); + println!("Hint: Ensure the vault directory is a Git repository."); + } else { + println!("History for {} ({} commits):", path, history.len()); + for entry in history { + let ts = entry["timestamp"].as_str().unwrap_or("unknown"); + let msg = entry["message"].as_str().unwrap_or(""); + let author = entry["author"].as_str().unwrap_or(""); + let ins = entry["insertions"].as_u64().unwrap_or(0); + let del = entry["deletions"].as_u64().unwrap_or(0); + let diff_str = if ins > 0 || del > 0 { + format!(" (+{} -{})", ins, del) + } else { + String::new() + }; + println!(" [{}] {} by {}{}", ts, msg, author, diff_str); + } + } + } + } + Ok(()) +} + +pub fn run_clean(ctx: &mut crate::storage::AppContext) -> anyhow::Result<()> { + info!("正在清理注册表中的备份条目"); + let conn = ctx.conn_mut()?; + // Entities is the single source of truth. + let deleted = conn.execute( + &format!("DELETE FROM entities WHERE entity_type = '{}' AND (id LIKE 'Clarity_%' OR id LIKE 'clarity_backup%')", crate::registry::ENTITY_TYPE_REPO), + [], + )?; + println!("已从 devbase 注册表中删除 {} 个备份条目。", deleted); + println!("\n剩余已注册仓库:"); + let mut stmt = conn.prepare(&format!( + "SELECT id, local_path FROM entities WHERE entity_type = '{}'", + crate::registry::ENTITY_TYPE_REPO + ))?; + let rows = + stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?; + for row in rows { + let (id, path) = row?; + println!(" [{}] {}", id, path); + } + Ok(()) +} + +pub fn run_tag( + ctx: &mut crate::storage::AppContext, + repo_id: &str, + tags: &str, +) -> anyhow::Result<()> { + info!("为 {} 打标签: {}", repo_id, tags); + let mut conn = ctx.conn_mut()?; + let tag_list: Vec<&str> = tags.split(',').map(|s| s.trim()).filter(|s| !s.is_empty()).collect(); + let tx = conn.transaction()?; + let exists: bool = tx + .query_row( + &format!( + "SELECT 1 FROM entities WHERE id = ?1 AND entity_type = '{}'", + crate::registry::ENTITY_TYPE_REPO + ), + [&repo_id], + |_| Ok(true), + ) + .unwrap_or(false); + if !exists { + println!("注册表中未找到仓库 '{}'。", repo_id); + } else { + tx.execute("DELETE FROM repo_tags WHERE repo_id = ?1", [&repo_id])?; + for tag in &tag_list { + tx.execute( + "INSERT OR REPLACE INTO repo_tags (repo_id, tag) VALUES (?1, ?2)", + rusqlite::params![&repo_id, tag], + )?; + } + crate::registry::repo::sync_repo_tags_to_entity(&tx, repo_id)?; + tx.commit()?; + println!("已为 '{}' 打上标签 '{}'。", repo_id, tags); + } + Ok(()) +} + +pub fn run_meta( + ctx: &mut crate::storage::AppContext, + repo_id: &str, + tier: Option, + workspace_type: Option, +) -> anyhow::Result<()> { + info!("更新 {} 的元数据", repo_id); + let conn = ctx.conn_mut()?; + let exists: bool = conn + .query_row( + "SELECT 1 FROM entities WHERE id = ?1 AND entity_type = 'repo'", + [&repo_id], + |_| Ok(true), + ) + .unwrap_or(false); + if !exists { + println!("注册表中未找到仓库 '{}'。", repo_id); + } else { + if let Some(ref t) = tier { + crate::registry::repo::update_repo_tier(&conn, repo_id, t)?; + println!("已将 '{}' 的数据分级设为 '{}'。", repo_id, t); + } + if let Some(ref wt) = workspace_type { + crate::registry::repo::update_repo_workspace_type(&conn, repo_id, wt)?; + println!("已将 '{}' 的工作区类型设为 '{}'。", repo_id, wt); + } + if tier.is_none() && workspace_type.is_none() { + println!("未提供任何要更新的字段。使用 --tier 或 --workspace-type 指定。"); + } + } + Ok(()) +} + +#[cfg(feature = "watch")] +pub async fn run_watch( + ctx: &mut crate::storage::AppContext, + path: &str, + duration: u64, +) -> anyhow::Result<()> { + use std::time::Duration; + use watch::{FolderScheduler, FsWatcher, WatchAggregator}; + + let root = std::path::PathBuf::from(path); + let watcher = FsWatcher::new(&root)?; + let aggregator = WatchAggregator { + max_files: ctx.config.watch.max_files, + ..Default::default() + }; + let mut scheduler = FolderScheduler::with_max_files(root.clone(), ctx.config.watch.max_files); + + println!("Watching {} for {} seconds...", path, duration); + let start = std::time::Instant::now(); + let total_duration = Duration::from_secs(duration); + + while start.elapsed() < total_duration { + let remaining = total_duration.saturating_sub(start.elapsed()); + if let Some(events) = watcher.poll_event(remaining) { + let aggregated = aggregator.aggregate(events); + let actions = scheduler.check_and_schedule(aggregated)?; + if !actions.is_empty() { + println!("Detected changes, actions: {:?}", actions); + } + } + } + + println!("Watch completed for {}", path); + Ok(()) +} + +pub fn run_skill_sync( + _ctx: &mut crate::storage::AppContext, + output: &str, + filter_tags: Option, + dry_run: bool, +) -> anyhow::Result<()> { + let filter_tags: Vec = filter_tags + .map(|s| s.split(',').map(|t| t.trim().to_string()).collect()) + .unwrap_or_default(); + match skill_sync::run_sync(output, &filter_tags, dry_run) { + Ok(count) => { + if dry_run { + println!("Would sync {} vault notes to {}", count, output); + } else { + println!("Synced {} vault notes to {}", count, output); + } + Ok(()) + } + Err(e) => Err(anyhow::anyhow!("Skill sync failed: {}", e)), + } +} + +pub async fn run_digest(ctx: &mut crate::storage::AppContext) -> anyhow::Result<()> { + let digest_config = ctx.config.digest.clone(); + let pool = ctx.pool(); + let i18n = ctx.i18n; + match tokio::task::spawn_blocking(move || { + let conn = pool.get()?; + let cfg = config::Config { + general: config::GeneralConfig::default(), + digest: digest_config, + ..Default::default() + }; + digest::generate_daily_digest(&conn, &cfg, &i18n) + }) + .await + { + Ok(Ok(text)) => { + println!("{}", text); + Ok(()) + } + Ok(Err(e)) => { + println!("{}: {}", ctx.i18n.log.digest_failed, e); + Ok(()) + } + Err(e) => { + println!("{}: {}", ctx.i18n.log.digest_panic, e); + Ok(()) + } + } +} + +pub fn run_oplog( + ctx: &mut crate::storage::AppContext, + limit: i64, + repo: Option, +) -> anyhow::Result<()> { + let start = std::time::Instant::now(); + let conn = ctx.conn_mut()?; + let entries = match repo { + Some(ref r) => crate::registry::workspace::list_oplog_by_repo(&conn, r, limit)?, + None => crate::registry::workspace::list_oplog(&conn, limit)?, + }; + let elapsed_ms = start.elapsed().as_millis(); + if entries.is_empty() { + println!("操作日志为空。(查询耗时 {}ms)", elapsed_ms); + } else { + println!("最近 {} 条操作日志:(查询耗时 {}ms)", entries.len(), elapsed_ms); + for entry in entries { + let ts = entry.timestamp.format("%Y-%m-%d %H:%M:%S").to_string(); + let repo = entry.repo_id.as_deref().unwrap_or("-"); + let details_display = if entry.event_version >= 1 { + match serde_json::from_str::( + entry.details.as_deref().unwrap_or("{}"), + ) { + Ok(val) => { + if let Some(obj) = val.as_object() { + obj.iter() + .map(|(k, v)| format!("{}={}", k, v)) + .collect::>() + .join(", ") + } else { + entry.details.as_deref().unwrap_or("").to_string() + } + } + Err(_) => entry.details.as_deref().unwrap_or("").to_string(), + } + } else { + entry.details.as_deref().unwrap_or("").to_string() + }; + let duration_display = + entry.duration_ms.map(|d| format!(" | duration={}ms", d)).unwrap_or_default(); + println!( + " [{}] {} | repo={} | status={}{}{}", + ts, + entry.event_type.as_str(), + repo, + entry.status, + duration_display, + if details_display.is_empty() { + "".to_string() + } else { + format!(" | {}", details_display) + } + ); + } + } + Ok(()) +} diff --git a/src/config.rs b/src/config.rs index b21f03d..70bf853 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,536 +1,559 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -use serde::{Deserialize, Serialize}; -use std::path::PathBuf; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct LlmConfig { - #[serde(default = "default_llm_enabled")] - pub enabled: bool, - #[serde(default = "default_llm_provider")] - pub provider: String, - #[serde(default)] - pub api_key: Option, - #[serde(default)] - pub model: Option, - #[serde(default)] - pub base_url: Option, - #[serde(default = "default_llm_max_tokens")] - pub max_tokens: u32, - #[serde(default = "default_llm_timeout_seconds")] - pub timeout_seconds: u64, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct EmbeddingConfig { - #[serde(default = "default_embedding_enabled")] - pub enabled: bool, - #[serde(default = "default_embedding_provider")] - pub provider: String, - #[serde(default = "default_embedding_model")] - pub model: String, - #[serde(default = "default_embedding_base_url")] - pub base_url: String, - #[serde(default = "default_embedding_timeout_seconds")] - pub timeout_seconds: u64, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SyncConfig { - #[serde(default = "default_sync_timeout_seconds")] - pub timeout_seconds: u64, - #[serde(default = "default_sync_concurrency")] - pub concurrency: usize, -} - -impl Default for SyncConfig { - fn default() -> Self { - Self { - timeout_seconds: default_sync_timeout_seconds(), - concurrency: default_sync_concurrency(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ScanConfig { - #[serde(default)] - pub exclude_paths: Vec, - #[serde(default = "default_exclude_patterns")] - pub exclude_patterns: Vec, -} - -impl Default for ScanConfig { - fn default() -> Self { - Self { - exclude_paths: Vec::new(), - exclude_patterns: default_exclude_patterns(), - } - } -} - -pub fn default_exclude_patterns() -> Vec { - vec![ - "target".into(), - ".venv".into(), - "venv".into(), - "node_modules".into(), - "dist".into(), - "build".into(), - "__pycache__".into(), - ".git".into(), - ".cargo".into(), - ] -} - -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub struct Config { - #[serde(default)] - pub general: GeneralConfig, - #[serde(default)] - pub daemon: DaemonConfig, - #[serde(default)] - pub cache: CacheConfig, - #[serde(default)] - pub watch: WatchConfig, - #[serde(default)] - pub digest: DigestConfig, - #[serde(default)] - pub github: GithubConfig, - #[serde(default)] - pub llm: LlmConfig, - #[serde(default)] - pub embedding: EmbeddingConfig, - #[serde(default)] - pub sync: SyncConfig, - #[serde(default)] - pub arxiv: ArxivConfig, - #[serde(default)] - pub scan: ScanConfig, - #[serde(default)] - pub greptime: GreptimeConfig, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GithubConfig { - #[serde(default)] - pub token: Option, - #[serde(default = "default_github_timeout_seconds")] - pub timeout_seconds: u64, -} - -impl Default for GithubConfig { - fn default() -> Self { - Self { - token: None, - timeout_seconds: default_github_timeout_seconds(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GeneralConfig { - #[serde(default = "default_language")] - pub language: String, -} - -impl Default for GeneralConfig { - fn default() -> Self { - Self { language: default_language() } - } -} - -fn default_language() -> String { - "auto".to_string() -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DaemonConfig { - #[serde(default = "default_daemon_interval_seconds")] - pub interval_seconds: u64, - #[serde(default = "default_true")] - pub incremental: bool, - #[serde(default = "default_health_stale_hours")] - pub health_stale_hours: i64, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CacheConfig { - #[serde(default = "default_cache_ttl_seconds")] - pub ttl_seconds: i64, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WatchConfig { - #[serde(default = "default_watch_max_files")] - pub max_files: usize, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DigestConfig { - #[serde(default = "default_digest_window_hours")] - pub window_hours: i64, -} - -impl Default for DaemonConfig { - fn default() -> Self { - Self { - interval_seconds: default_daemon_interval_seconds(), - incremental: default_true(), - health_stale_hours: default_health_stale_hours(), - } - } -} - -impl Default for CacheConfig { - fn default() -> Self { - Self { - ttl_seconds: default_cache_ttl_seconds(), - } - } -} - -impl Default for WatchConfig { - fn default() -> Self { - Self { - max_files: default_watch_max_files(), - } - } -} - -impl Default for DigestConfig { - fn default() -> Self { - Self { - window_hours: default_digest_window_hours(), - } - } -} - -impl Default for LlmConfig { - fn default() -> Self { - Self { - enabled: default_llm_enabled(), - provider: default_llm_provider(), - api_key: None, - model: None, - base_url: None, - max_tokens: default_llm_max_tokens(), - timeout_seconds: default_llm_timeout_seconds(), - } - } -} - -impl Default for EmbeddingConfig { - fn default() -> Self { - Self { - enabled: default_embedding_enabled(), - provider: default_embedding_provider(), - model: default_embedding_model(), - base_url: default_embedding_base_url(), - timeout_seconds: default_embedding_timeout_seconds(), - } - } -} - -fn default_llm_enabled() -> bool { - false -} -fn default_llm_provider() -> String { - "ollama".to_string() -} -fn default_llm_max_tokens() -> u32 { - 200 -} -fn default_llm_timeout_seconds() -> u64 { - 30 -} - -fn default_embedding_enabled() -> bool { - false -} -fn default_embedding_provider() -> String { - "ollama".to_string() -} -fn default_embedding_model() -> String { - "all-minilm".to_string() -} -fn default_embedding_base_url() -> String { - "http://localhost:11434".to_string() -} -fn default_embedding_timeout_seconds() -> u64 { - 30 -} -fn default_sync_timeout_seconds() -> u64 { - 60 -} -fn default_sync_concurrency() -> usize { - 8 -} -fn default_github_timeout_seconds() -> u64 { - 5 -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ArxivConfig { - #[serde(default = "default_true")] - pub enabled: bool, - #[serde(default = "default_arxiv_timeout_seconds")] - pub timeout_seconds: u64, -} - -impl Default for ArxivConfig { - fn default() -> Self { - Self { - enabled: default_true(), - timeout_seconds: default_arxiv_timeout_seconds(), - } - } -} - -fn default_arxiv_timeout_seconds() -> u64 { - 30 -} - -fn default_daemon_interval_seconds() -> u64 { - 3600 -} -fn default_true() -> bool { - true -} -fn default_health_stale_hours() -> i64 { - 24 -} -fn default_cache_ttl_seconds() -> i64 { - 300 -} -pub fn default_watch_max_files() -> usize { - 512 -} -fn default_digest_window_hours() -> i64 { - 24 -} - -impl Config { - pub fn load() -> anyhow::Result { - let path = Self::config_path()?; - if !path.exists() { - let config = Self::default(); - let _ = config.save_default(); - return Ok(config); - } - let content = std::fs::read_to_string(&path)?; - let config: Self = toml::from_str(&content)?; - Ok(config) - } - - pub fn save(&self) -> anyhow::Result<()> { - let path = Self::config_path()?; - if let Some(parent) = path.parent() { - std::fs::create_dir_all(parent)?; - } - let content = toml::to_string_pretty(self)?; - std::fs::write(&path, content)?; - Ok(()) - } - - /// Write a default config file with inline comments for first-time users. - pub fn save_default(&self) -> anyhow::Result<()> { - let path = Self::config_path()?; - if let Some(parent) = path.parent() { - std::fs::create_dir_all(parent)?; - } - let content = r#"# devbase configuration file -# Generated automatically on first run. Modify as needed. - -[general] -# UI language: "auto", "en", or "zh" -language = "auto" - -[daemon] -# Background maintenance interval in seconds -interval_seconds = 3600 -incremental = true -health_stale_hours = 24 - -[cache] -# How long to cache health/stars data before re-fetching (seconds) -ttl_seconds = 300 - -[watch] -max_files = 512 - -[digest] -window_hours = 24 - -[github] -# Uncomment and set your GitHub Personal Access Token to avoid API rate limits. -# NEVER commit this file with a real token — keep it in user config dir only. -# token = "" -timeout_seconds = 5 - -[llm] -enabled = false -provider = "ollama" -# api_key = "" -# model = "" -# base_url = "" -max_tokens = 200 -timeout_seconds = 30 - -[embedding] -# Local embedding for semantic code search. -# Backend: "candle" (pure Rust, all-MiniLM-L6-v2) or "ollama" (requires local Ollama). -# Use "all-minilm" model with Ollama for 384-dim embeddings (compatible with candle). -enabled = false -provider = "ollama" -model = "all-minilm" -base_url = "http://localhost:11434" -timeout_seconds = 30 - -[sync] -# Max concurrent sync operations -timeout_seconds = 60 -concurrency = 8 - -[scan] -# Paths to exclude from repository discovery. -# Use absolute paths or paths relative to the scan root. -# Example: exclude_paths = ["C:/Users/22414/dev/third_party/clarity", "third_party"] -exclude_paths = [] - -[arxiv] -enabled = true -timeout_seconds = 30 -"#; - std::fs::write(&path, content)?; - Ok(()) - } - - pub fn config_path() -> anyhow::Result { - let dir = dirs::config_dir() - .ok_or_else(|| anyhow::anyhow!("Could not determine config directory"))? - .join("devbase"); - Ok(dir.join("config.toml")) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_config_default() { - let cfg = Config::default(); - assert_eq!(cfg.general.language, "auto"); - assert_eq!(cfg.daemon.interval_seconds, 3600); - assert!(cfg.daemon.incremental); - assert_eq!(cfg.daemon.health_stale_hours, 24); - assert_eq!(cfg.cache.ttl_seconds, 300); - assert_eq!(cfg.watch.max_files, 512); - assert_eq!(cfg.digest.window_hours, 24); - assert_eq!(cfg.github.timeout_seconds, 5); - assert!(!cfg.llm.enabled); - assert_eq!(cfg.llm.provider, "ollama"); - assert_eq!(cfg.llm.max_tokens, 200); - assert_eq!(cfg.llm.timeout_seconds, 30); - assert_eq!(cfg.sync.timeout_seconds, 60); - assert_eq!(cfg.sync.concurrency, 8); - assert!(cfg.scan.exclude_paths.is_empty()); - } - - #[test] - fn test_config_serialize_roundtrip() { - let cfg = Config::default(); - let toml_str = toml::to_string_pretty(&cfg).unwrap(); - let parsed: Config = toml::from_str(&toml_str).unwrap(); - assert_eq!(parsed.general.language, cfg.general.language); - assert_eq!(parsed.daemon.interval_seconds, cfg.daemon.interval_seconds); - assert_eq!(parsed.llm.provider, cfg.llm.provider); - } - - #[test] - fn test_config_custom_values() { - let toml_str = r#" -[general] -language = "en" - -[daemon] -interval_seconds = 1800 -incremental = false -health_stale_hours = 12 - -[github] -token = "ghp_test" -timeout_seconds = 10 - -[llm] -enabled = true -provider = "openai" -model = "gpt-4" -max_tokens = 400 -"#; - let cfg: Config = toml::from_str(toml_str).unwrap(); - assert_eq!(cfg.general.language, "en"); - assert_eq!(cfg.daemon.interval_seconds, 1800); - assert!(!cfg.daemon.incremental); - assert_eq!(cfg.daemon.health_stale_hours, 12); - assert_eq!(cfg.github.token, Some("ghp_test".to_string())); - assert_eq!(cfg.github.timeout_seconds, 10); - assert!(cfg.llm.enabled); - assert_eq!(cfg.llm.provider, "openai"); - assert_eq!(cfg.llm.model, Some("gpt-4".to_string())); - assert_eq!(cfg.llm.max_tokens, 400); - // Fields not set should use defaults - assert_eq!(cfg.cache.ttl_seconds, 300); - assert_eq!(cfg.sync.concurrency, 8); - } - - #[test] - fn test_config_empty_uses_defaults() { - let cfg: Config = toml::from_str("").unwrap(); - assert_eq!(cfg.general.language, "auto"); - assert_eq!(cfg.daemon.interval_seconds, 3600); - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GreptimeConfig { - #[serde(default = "default_greptime_enabled")] - pub enabled: bool, - #[serde(default = "default_greptime_endpoint")] - pub endpoint: String, - #[serde(default = "default_greptime_dbname")] - pub dbname: String, - #[serde(default = "default_greptime_username")] - pub username: String, - #[serde(default)] - pub password: Option, -} - -impl Default for GreptimeConfig { - fn default() -> Self { - Self { - enabled: default_greptime_enabled(), - endpoint: default_greptime_endpoint(), - dbname: default_greptime_dbname(), - username: default_greptime_username(), - password: None, - } - } -} - -fn default_greptime_enabled() -> bool { - false -} - -fn default_greptime_endpoint() -> String { - "127.0.0.1:4001".to_string() -} - -fn default_greptime_dbname() -> String { - "devbase".to_string() -} - -fn default_greptime_username() -> String { - "root".to_string() -} +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlmConfig { + #[serde(default = "default_llm_enabled")] + pub enabled: bool, + #[serde(default = "default_llm_provider")] + pub provider: String, + #[serde(default)] + pub api_key: Option, + #[serde(default)] + pub model: Option, + #[serde(default)] + pub base_url: Option, + #[serde(default = "default_llm_max_tokens")] + pub max_tokens: u32, + #[serde(default = "default_llm_timeout_seconds")] + pub timeout_seconds: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EmbeddingConfig { + #[serde(default = "default_embedding_enabled")] + pub enabled: bool, + #[serde(default = "default_embedding_provider")] + pub provider: String, + #[serde(default = "default_embedding_model")] + pub model: String, + #[serde(default = "default_embedding_base_url")] + pub base_url: String, + #[serde(default = "default_embedding_timeout_seconds")] + pub timeout_seconds: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SyncConfig { + #[serde(default = "default_sync_timeout_seconds")] + pub timeout_seconds: u64, + #[serde(default = "default_sync_concurrency")] + pub concurrency: usize, +} + +impl Default for SyncConfig { + fn default() -> Self { + Self { + timeout_seconds: default_sync_timeout_seconds(), + concurrency: default_sync_concurrency(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ScanConfig { + #[serde(default)] + pub exclude_paths: Vec, + #[serde(default = "default_exclude_patterns")] + pub exclude_patterns: Vec, +} + +impl Default for ScanConfig { + fn default() -> Self { + Self { + exclude_paths: Vec::new(), + exclude_patterns: default_exclude_patterns(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VaultConfig { + #[serde(default = "default_vault_roots")] + pub roots: Vec, + #[serde(default = "default_true")] + pub follow_symlinks: bool, +} + +impl Default for VaultConfig { + fn default() -> Self { + Self { + roots: default_vault_roots(), + follow_symlinks: true, + } + } +} + +fn default_vault_roots() -> Vec { + vec![] +} + +pub fn default_exclude_patterns() -> Vec { + vec![ + "target".into(), + ".venv".into(), + "venv".into(), + "node_modules".into(), + "dist".into(), + "build".into(), + "__pycache__".into(), + ".git".into(), + ".cargo".into(), + ] +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct Config { + #[serde(default)] + pub general: GeneralConfig, + #[serde(default)] + pub daemon: DaemonConfig, + #[serde(default)] + pub cache: CacheConfig, + #[serde(default)] + pub watch: WatchConfig, + #[serde(default)] + pub digest: DigestConfig, + #[serde(default)] + pub github: GithubConfig, + #[serde(default)] + pub llm: LlmConfig, + #[serde(default)] + pub embedding: EmbeddingConfig, + #[serde(default)] + pub sync: SyncConfig, + #[serde(default)] + pub arxiv: ArxivConfig, + #[serde(default)] + pub scan: ScanConfig, + #[serde(default)] + pub vault: VaultConfig, + #[serde(default)] + pub greptime: GreptimeConfig, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GithubConfig { + #[serde(default)] + pub token: Option, + #[serde(default = "default_github_timeout_seconds")] + pub timeout_seconds: u64, +} + +impl Default for GithubConfig { + fn default() -> Self { + Self { + token: None, + timeout_seconds: default_github_timeout_seconds(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GeneralConfig { + #[serde(default = "default_language")] + pub language: String, +} + +impl Default for GeneralConfig { + fn default() -> Self { + Self { language: default_language() } + } +} + +fn default_language() -> String { + "auto".to_string() +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DaemonConfig { + #[serde(default = "default_daemon_interval_seconds")] + pub interval_seconds: u64, + #[serde(default = "default_true")] + pub incremental: bool, + #[serde(default = "default_health_stale_hours")] + pub health_stale_hours: i64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CacheConfig { + #[serde(default = "default_cache_ttl_seconds")] + pub ttl_seconds: i64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WatchConfig { + #[serde(default = "default_watch_max_files")] + pub max_files: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DigestConfig { + #[serde(default = "default_digest_window_hours")] + pub window_hours: i64, +} + +impl Default for DaemonConfig { + fn default() -> Self { + Self { + interval_seconds: default_daemon_interval_seconds(), + incremental: default_true(), + health_stale_hours: default_health_stale_hours(), + } + } +} + +impl Default for CacheConfig { + fn default() -> Self { + Self { + ttl_seconds: default_cache_ttl_seconds(), + } + } +} + +impl Default for WatchConfig { + fn default() -> Self { + Self { + max_files: default_watch_max_files(), + } + } +} + +impl Default for DigestConfig { + fn default() -> Self { + Self { + window_hours: default_digest_window_hours(), + } + } +} + +impl Default for LlmConfig { + fn default() -> Self { + Self { + enabled: default_llm_enabled(), + provider: default_llm_provider(), + api_key: None, + model: None, + base_url: None, + max_tokens: default_llm_max_tokens(), + timeout_seconds: default_llm_timeout_seconds(), + } + } +} + +impl Default for EmbeddingConfig { + fn default() -> Self { + Self { + enabled: default_embedding_enabled(), + provider: default_embedding_provider(), + model: default_embedding_model(), + base_url: default_embedding_base_url(), + timeout_seconds: default_embedding_timeout_seconds(), + } + } +} + +fn default_llm_enabled() -> bool { + false +} +fn default_llm_provider() -> String { + "ollama".to_string() +} +fn default_llm_max_tokens() -> u32 { + 200 +} +fn default_llm_timeout_seconds() -> u64 { + 30 +} + +fn default_embedding_enabled() -> bool { + false +} +fn default_embedding_provider() -> String { + "ollama".to_string() +} +fn default_embedding_model() -> String { + "all-minilm".to_string() +} +fn default_embedding_base_url() -> String { + "http://localhost:11434".to_string() +} +fn default_embedding_timeout_seconds() -> u64 { + 30 +} +fn default_sync_timeout_seconds() -> u64 { + 60 +} +fn default_sync_concurrency() -> usize { + 8 +} +fn default_github_timeout_seconds() -> u64 { + 5 +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArxivConfig { + #[serde(default = "default_true")] + pub enabled: bool, + #[serde(default = "default_arxiv_timeout_seconds")] + pub timeout_seconds: u64, +} + +impl Default for ArxivConfig { + fn default() -> Self { + Self { + enabled: default_true(), + timeout_seconds: default_arxiv_timeout_seconds(), + } + } +} + +fn default_arxiv_timeout_seconds() -> u64 { + 30 +} + +fn default_daemon_interval_seconds() -> u64 { + 3600 +} +fn default_true() -> bool { + true +} +fn default_health_stale_hours() -> i64 { + 24 +} +fn default_cache_ttl_seconds() -> i64 { + 300 +} +pub fn default_watch_max_files() -> usize { + 512 +} +fn default_digest_window_hours() -> i64 { + 24 +} + +impl Config { + pub fn load() -> anyhow::Result { + let path = Self::config_path()?; + if !path.exists() { + let config = Self::default(); + let _ = config.save_default(); + return Ok(config); + } + let content = std::fs::read_to_string(&path)?; + let config: Self = toml::from_str(&content)?; + Ok(config) + } + + pub fn save(&self) -> anyhow::Result<()> { + let path = Self::config_path()?; + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + let content = toml::to_string_pretty(self)?; + std::fs::write(&path, content)?; + Ok(()) + } + + /// Write a default config file with inline comments for first-time users. + pub fn save_default(&self) -> anyhow::Result<()> { + let path = Self::config_path()?; + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + let content = r#"# devbase configuration file +# Generated automatically on first run. Modify as needed. + +[general] +# UI language: "auto", "en", or "zh" +language = "auto" + +[daemon] +# Background maintenance interval in seconds +interval_seconds = 3600 +incremental = true +health_stale_hours = 24 + +[cache] +# How long to cache health/stars data before re-fetching (seconds) +ttl_seconds = 300 + +[watch] +max_files = 512 + +[digest] +window_hours = 24 + +[github] +# Uncomment and set your GitHub Personal Access Token to avoid API rate limits. +# NEVER commit this file with a real token — keep it in user config dir only. +# token = "" +timeout_seconds = 5 + +[llm] +enabled = false +provider = "ollama" +# api_key = "" +# model = "" +# base_url = "" +max_tokens = 200 +timeout_seconds = 30 + +[embedding] +# Local embedding for semantic code search. +# Backend: "candle" (pure Rust, all-MiniLM-L6-v2) or "ollama" (requires local Ollama). +# Use "all-minilm" model with Ollama for 384-dim embeddings (compatible with candle). +enabled = false +provider = "ollama" +model = "all-minilm" +base_url = "http://localhost:11434" +timeout_seconds = 30 + +[sync] +# Max concurrent sync operations +timeout_seconds = 60 +concurrency = 8 + +[scan] +# Paths to exclude from repository discovery. +# Use absolute paths or paths relative to the scan root. +# Example: exclude_paths = ["C:/Users/22414/dev/third_party/clarity", "third_party"] +exclude_paths = [] + +[arxiv] +enabled = true +timeout_seconds = 30 +"#; + std::fs::write(&path, content)?; + Ok(()) + } + + pub fn config_path() -> anyhow::Result { + let dir = dirs::config_dir() + .ok_or_else(|| anyhow::anyhow!("Could not determine config directory"))? + .join("devbase"); + Ok(dir.join("config.toml")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_config_default() { + let cfg = Config::default(); + assert_eq!(cfg.general.language, "auto"); + assert_eq!(cfg.daemon.interval_seconds, 3600); + assert!(cfg.daemon.incremental); + assert_eq!(cfg.daemon.health_stale_hours, 24); + assert_eq!(cfg.cache.ttl_seconds, 300); + assert_eq!(cfg.watch.max_files, 512); + assert_eq!(cfg.digest.window_hours, 24); + assert_eq!(cfg.github.timeout_seconds, 5); + assert!(!cfg.llm.enabled); + assert_eq!(cfg.llm.provider, "ollama"); + assert_eq!(cfg.llm.max_tokens, 200); + assert_eq!(cfg.llm.timeout_seconds, 30); + assert_eq!(cfg.sync.timeout_seconds, 60); + assert_eq!(cfg.sync.concurrency, 8); + assert!(cfg.scan.exclude_paths.is_empty()); + } + + #[test] + fn test_config_serialize_roundtrip() { + let cfg = Config::default(); + let toml_str = toml::to_string_pretty(&cfg).unwrap(); + let parsed: Config = toml::from_str(&toml_str).unwrap(); + assert_eq!(parsed.general.language, cfg.general.language); + assert_eq!(parsed.daemon.interval_seconds, cfg.daemon.interval_seconds); + assert_eq!(parsed.llm.provider, cfg.llm.provider); + } + + #[test] + fn test_config_custom_values() { + let toml_str = r#" +[general] +language = "en" + +[daemon] +interval_seconds = 1800 +incremental = false +health_stale_hours = 12 + +[github] +token = "ghp_test" +timeout_seconds = 10 + +[llm] +enabled = true +provider = "openai" +model = "gpt-4" +max_tokens = 400 +"#; + let cfg: Config = toml::from_str(toml_str).unwrap(); + assert_eq!(cfg.general.language, "en"); + assert_eq!(cfg.daemon.interval_seconds, 1800); + assert!(!cfg.daemon.incremental); + assert_eq!(cfg.daemon.health_stale_hours, 12); + assert_eq!(cfg.github.token, Some("ghp_test".to_string())); + assert_eq!(cfg.github.timeout_seconds, 10); + assert!(cfg.llm.enabled); + assert_eq!(cfg.llm.provider, "openai"); + assert_eq!(cfg.llm.model, Some("gpt-4".to_string())); + assert_eq!(cfg.llm.max_tokens, 400); + // Fields not set should use defaults + assert_eq!(cfg.cache.ttl_seconds, 300); + assert_eq!(cfg.sync.concurrency, 8); + } + + #[test] + fn test_config_empty_uses_defaults() { + let cfg: Config = toml::from_str("").unwrap(); + assert_eq!(cfg.general.language, "auto"); + assert_eq!(cfg.daemon.interval_seconds, 3600); + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GreptimeConfig { + #[serde(default = "default_greptime_enabled")] + pub enabled: bool, + #[serde(default = "default_greptime_endpoint")] + pub endpoint: String, + #[serde(default = "default_greptime_dbname")] + pub dbname: String, + #[serde(default = "default_greptime_username")] + pub username: String, + #[serde(default)] + pub password: Option, +} + +impl Default for GreptimeConfig { + fn default() -> Self { + Self { + enabled: default_greptime_enabled(), + endpoint: default_greptime_endpoint(), + dbname: default_greptime_dbname(), + username: default_greptime_username(), + password: None, + } + } +} + +fn default_greptime_enabled() -> bool { + false +} + +fn default_greptime_endpoint() -> String { + "127.0.0.1:4001".to_string() +} + +fn default_greptime_dbname() -> String { + "devbase".to_string() +} + +fn default_greptime_username() -> String { + "root".to_string() +} diff --git a/src/main.rs b/src/main.rs index c4c06bd..4d35c6c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,816 +1,822 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -use clap::{Parser, Subcommand}; - -use devbase::*; - -mod commands; - -#[derive(Parser)] -#[command(name = "devbase", version)] -#[command(about = "Developer workspace database and knowledge-base manager")] -struct Cli { - #[command(subcommand)] - command: Commands, -} - -#[derive(Subcommand)] -pub(crate) enum Commands { - /// Scan a directory for Git repositories and register them - Scan { - /// Directory to scan (defaults to workspace root) - #[arg(default_value = ".")] - path: String, - /// Register discovered repos into the database - #[arg(long)] - register: bool, - /// Output results as JSON - #[arg(long)] - json: bool, - }, - /// Check the health of registered repositories and the environment - Health { - /// Show detailed per-repo status - #[arg(long)] - detail: bool, - /// Maximum number of repos to display per page (0 = unlimited) - #[arg(long, default_value_t = 0)] - limit: usize, - /// Page number (1-based) - #[arg(long, default_value_t = 1)] - page: usize, - /// Output results as JSON - #[arg(long)] - json: bool, - }, - /// Show index status for registered repositories - Status { - /// Output results as JSON - #[arg(long)] - json: bool, - }, - /// Sync registered repositories with their upstream remotes - Sync { - /// Dry-run: show what would be updated without applying - #[arg(long)] - dry_run: bool, - /// Comma-separated list of tags to filter repositories (OR logic) - #[arg(long)] - filter_tags: Option, - /// Comma-separated list of repo IDs to exclude from sync - #[arg(long)] - exclude: Option, - /// Output results as JSON - #[arg(long)] - json: bool, - }, - /// Query the knowledge base - Query { - /// Query expression, e.g. "lang:rust stale:>30" - query: String, - /// Maximum number of results per page (0 = unlimited) - #[arg(long, default_value_t = 0)] - limit: usize, - /// Page number (1-based) - #[arg(long, default_value_t = 1)] - page: usize, - /// Output results as JSON - #[arg(long)] - json: bool, - }, - /// Index repository summaries and module structures - Index { - /// Specific path to index; if omitted, index all registered repos - #[arg(default_value = "")] - path: String, - /// Skip semantic embedding generation (symbols/calls still indexed) - #[arg(long)] - skip_embeddings: bool, - }, - /// Remove archive/backup entries from registry - Clean, - /// Tag a registered repository - Tag { - /// Repository ID - repo_id: String, - /// Comma-separated tags - tags: String, - }, - /// Update metadata (tier / workspace type) of a registered repository - Meta { - /// Repository ID - repo_id: String, - /// Data tier: public, cooperative, or private - #[arg(long)] - tier: Option, - /// Workspace type: git, openclaw, or generic - #[arg(long)] - workspace_type: Option, - }, - /// Launch interactive TUI - #[cfg(feature = "tui")] - Tui, - /// Run as an MCP server (stdio transport) - #[cfg(feature = "mcp")] - Mcp { - /// Comma-separated tool tiers to expose (stable,beta,experimental). - /// Defaults to all tiers if omitted. - #[arg(long)] - tools: Option, - }, - /// Start the background daemon for knowledge maintenance - Daemon { - /// Tick interval in seconds - #[arg(long)] - interval: Option, - }, - #[cfg(feature = "watch")] - /// Watch a directory for changes and schedule sync actions - Watch { - /// Directory to watch - #[arg(default_value = ".")] - path: String, - /// Watch duration in seconds - #[arg(long, default_value = "10")] - duration: u64, - }, - /// Sync vault notes with ai_context=true to Clarity SKILL.md format - SkillSync { - /// Output directory for generated SKILL.md files - #[arg(long, default_value = "skills")] - output: String, - /// Only sync notes matching specific tags (comma-separated) - #[arg(long)] - filter_tags: Option, - /// Preview mode: list what would be synced without writing files - #[arg(long)] - dry_run: bool, - }, - /// Push registered repositories to Syncthing as sync folders - SyncthingPush { - /// Syncthing REST API base URL - #[arg(long, default_value = "http://127.0.0.1:8384")] - api_url: String, - /// Syncthing API key (optional if no auth) - #[arg(long)] - api_key: Option, - /// Only push repos matching these tags (comma-separated, OR logic) - #[arg(long)] - filter_tags: Option, - /// Only push the repo associated with this experiment ID - #[arg(long)] - experiment: Option, - }, - /// Auto-discover relationships between registered repositories - #[cfg(feature = "tui")] - Discover, - /// Generate daily knowledge digest - Digest, - /// Generate knowledge coverage report for the workspace or a repo - KnowledgeReport { - /// Specific repo ID; if omitted, reports on the entire workspace - #[arg(default_value = "")] - repo_id: String, - /// Number of recent activity events to include - #[arg(long, default_value_t = 20)] - activity_limit: usize, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// View the operation log - Oplog { - /// Limit number of entries (default: 20) - #[arg(long, default_value_t = 20)] - limit: i64, - /// Filter by repo ID - #[arg(long)] - repo: Option, - }, - /// Show code metrics for registered repositories - Metrics { - /// Specific repo ID; if omitted, shows all repos - #[arg(default_value = "")] - repo_id: String, - /// Output as JSON - #[arg(long)] - json: bool, - /// Recompute metrics from source (tokei) before showing - #[arg(long)] - recalc: bool, - }, - /// Show module graph for a repository - ModuleGraph { - /// Repository ID; if omitted, shows all Rust repos - #[arg(default_value = "")] - repo_id: String, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Query the intra-repository call graph - CallGraph { - /// Repository ID to query - repo_id: String, - /// Called function name (who calls X) - #[arg(long)] - callee: Option, - /// Calling function name (what does Y call) - #[arg(long)] - caller: Option, - /// Optional file path substring to narrow scope - #[arg(long)] - file: Option, - /// Maximum results - #[arg(long, default_value_t = 50)] - limit: usize, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Query cross-repository dependency graph - DependencyGraph { - /// Repository ID to query - repo_id: String, - /// Direction: outgoing (this repo depends on) or incoming (depends on this repo) - #[arg(long, default_value = "outgoing")] - direction: String, - /// Filter by relation type - #[arg(long)] - relation_type: Option, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Query code symbols for a repository - CodeSymbols { - /// Repository ID to query - repo_id: String, - /// Optional symbol name substring filter - #[arg(long)] - name: Option, - /// Optional symbol type filter (function, struct, enum, trait, impl, module, type_alias, constant, static) - #[arg(long)] - symbol_type: Option, - /// Optional file path substring filter - #[arg(long)] - file: Option, - /// Maximum results - #[arg(long, default_value_t = 50)] - limit: usize, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Find potentially dead (unused) functions in a repository - DeadCode { - /// Repository ID to analyze - repo_id: String, - /// Also report `pub fn` items - #[arg(long)] - include_pub: bool, - /// Maximum results - #[arg(long, default_value_t = 50)] - limit: usize, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Fetch GitHub metadata for a registered repository - GithubInfo { - /// Repository ID - repo_id: String, - /// Write GitHub description into repo summary - #[arg(long)] - write_summary: bool, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Registry backup and restore operations - Registry { - #[command(subcommand)] - cmd: RegistryCommands, - }, - /// Vault note management - Vault { - #[command(subcommand)] - cmd: VaultCommands, - }, - /// Skill Runtime — install, discover, and execute AI skills - Skill { - #[command(subcommand)] - cmd: SkillCommands, - }, - /// Workflow Engine — orchestrate multi-Skill pipelines - Workflow { - #[command(subcommand)] - cmd: WorkflowCommands, - }, - /// Manage known system limits (L3 risk layer) - Limit { - #[command(subcommand)] - cmd: LimitCommands, - }, - /// Repository management — list repos and check git health - Repo { - #[command(subcommand)] - cmd: RepoCommands, - }, - /// Show version information - Version, -} - -#[derive(Subcommand)] -pub(crate) enum RepoCommands { - /// List all registered repositories - List { - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Show git health status (ahead/behind/dirty/managed) for all repos - Status { - /// Output as JSON - #[arg(long)] - json: bool, - }, -} - -#[derive(Subcommand)] -pub(crate) enum SkillCommands { - /// List installed skills - List { - /// Filter by skill type (builtin, custom, system) - #[arg(long)] - skill_type: Option, - /// Filter by category - #[arg(long)] - category: Option, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Install a skill from a local path or Git URL - Install { - /// Path to the skill directory or Git URL (must contain SKILL.md) - source: String, - /// Force treating source as a Git URL - #[arg(long)] - git: bool, - }, - /// Uninstall a skill - Uninstall { - /// Skill ID to remove - skill_id: String, - }, - /// Show skill details - Info { - /// Skill ID - skill_id: String, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Search skills by name or description - Search { - /// Query string - query: String, - /// Use semantic search (requires embeddings) - #[arg(long)] - semantic: bool, - /// Filter by category - #[arg(long)] - category: Option, - /// Maximum results - #[arg(long, default_value_t = 10)] - limit: usize, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Execute a skill - Run { - /// Skill ID - skill_id: String, - /// Arguments as key=value pairs - #[arg(long = "arg")] - args: Vec, - /// Timeout in seconds - #[arg(long, default_value_t = 30)] - timeout: u64, - /// Output full result as JSON - #[arg(long)] - json: bool, - }, - /// Validate a local SKILL.md file - Validate { - /// Path to SKILL.md or skill directory - path: String, - }, - /// Validate and prepare a skill for publishing - Publish { - /// Path to skill directory (default: current directory) - #[arg(default_value = ".")] - path: String, - /// Dry-run: validate without creating tag - #[arg(long)] - dry_run: bool, - }, - /// Sync skills to a plans directory (generic output, was clarity-only) - Sync { - /// Output directory for generated plan JSON files - output_dir: String, - }, - /// Discover and auto-package a project as a Skill - Discover { - /// Path to the project directory (or Git URL) - path: String, - /// Explicit skill ID (defaults to project name) - #[arg(long)] - skill_id: Option, - /// Dry-run: print generated files without installing - #[arg(long)] - dry_run: bool, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Recalculate skill scores from execution history - RecalcScores, - /// Show top-rated skills - Top { - /// Maximum results - #[arg(long, default_value_t = 10)] - limit: usize, - }, - /// Recommend skills based on execution scores - Recommend { - /// Filter by category - #[arg(long)] - category: Option, - /// Maximum results - #[arg(long, default_value_t = 5)] - limit: usize, - }, - /// Import skills from external sources (GitHub repos or local directories) - Import { - /// Source URL or path (GitHub URL or local directory) - source: String, - /// Path within the source to scan for SKILL.md files - #[arg(long)] - source_path: Option, - /// Dry-run: list discovered skills without installing - #[arg(long)] - dry_run: bool, - /// Output as JSON - #[arg(long)] - json: bool, - }, -} - -#[derive(Subcommand)] -pub(crate) enum WorkflowCommands { - /// List registered workflows - List { - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Show workflow definition - Show { - /// Workflow ID - workflow_id: String, - }, - /// Register a workflow from a YAML file - Register { - /// Path to workflow.yaml - path: String, - }, - /// Run a workflow - Run { - /// Workflow ID - workflow_id: String, - /// Workflow inputs as key=value pairs - #[arg(long = "input")] - inputs: Vec, - }, - /// Delete a workflow - Delete { - /// Workflow ID - workflow_id: String, - }, -} - -#[derive(Subcommand)] -pub(crate) enum VaultCommands { - /// Scan a directory for Markdown notes and sync into the vault - Scan { - /// Directory to scan (defaults to default vault dir) - #[arg(default_value = "")] - path: String, - }, - /// Rebuild the Tantivy search index for all vault notes - Reindex, - /// List all vault notes - List { - /// Filter by tag - #[arg(short, long)] - tag: Option, - }, - /// Read a vault note by its relative path (e.g. "99-Meta/devbase-essence.md") - Read { - /// Relative path of the note within the vault - path: String, - }, - /// Write or overwrite a vault note - Write { - /// Relative path of the note within the vault - path: String, - /// Note content (use "-" to read from stdin) - #[arg(short, long)] - content: Option, - /// Title (optional, defaults to filename) - #[arg(short, long)] - title: Option, - }, - /// Search vault notes by keyword - Search { - /// Search query - query: String, - /// Maximum results - #[arg(short, long, default_value_t = 20)] - limit: usize, - }, - /// Export vault notes to a directory with integrity validation - Export { - /// Output directory for the exported vault - #[arg(default_value = "")] - output_dir: String, - }, - /// Show Git revision history for a vault note - History { - /// Relative path of the note within the vault - path: String, - }, -} - -#[derive(Subcommand)] -pub(crate) enum LimitCommands { - /// Add or update a known limit - Add { - /// Unique identifier (kebab-case recommended) - id: String, - /// Category: hard-veto, known-bug, external-dep - #[arg(long, default_value = "known-bug")] - category: String, - /// Description of the limit - #[arg(long)] - description: Option, - /// Source reference (e.g., AGENTS.md, oplog) - #[arg(long)] - source: Option, - /// Severity 1-5 - #[arg(long)] - severity: Option, - }, - /// List known limits - List { - /// Filter by category - #[arg(long)] - category: Option, - /// Filter by mitigated status - #[arg(long)] - mitigated: Option, - /// Output as JSON - #[arg(long)] - json: bool, - }, - /// Resolve (mitigate) a known limit - Resolve { - /// Limit ID - id: String, - /// Reason for resolution (optional, stored in L4 metacognition layer) - #[arg(long)] - reason: Option, - }, - /// Delete a known limit - Delete { - /// Limit ID - id: String, - }, - /// Seed hard vetoes from AGENTS.md into the registry - Seed, -} - -#[derive(Subcommand)] -pub(crate) enum RegistryCommands { - /// Export registry to a backup file - Export { - /// Output format: sqlite or json - #[arg(long, default_value = "sqlite")] - format: String, - /// Output file path (optional, defaults to backup dir with timestamp) - #[arg(long)] - output: Option, - }, - /// Import registry from a backup SQLite file - Import { - /// Source backup file path - path: String, - /// Skip dry-run and execute immediately - #[arg(long)] - yes: bool, - }, - /// List existing registry backups - Backups, - /// Clean old backups, keeping only the most recent ones - Clean, -} - -#[tokio::main] -async fn main() -> anyhow::Result<()> { - tracing_subscriber::fmt() - .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) - .init(); - - let mut ctx = storage::AppContext::with_defaults()?; - let _lang = if ctx.config.general.language == "auto" || ctx.config.general.language.is_empty() { - let detected = i18n::detect_system_language(); - ctx.config.general.language = detected.clone(); - if let Err(e) = ctx.config.save() { - eprintln!("警告: 无法保存语言配置: {}", e); - } - detected - } else { - ctx.config.general.language.clone() - }; - // i18n is now initialized via AppContext, no global state needed - - let cli = Cli::parse(); - - match cli.command { - Commands::Scan { path, register, json } => { - commands::simple::run_scan(&mut ctx, &path, register, json).await?; - } - Commands::Health { detail, limit, page, json } => { - commands::simple::run_health(&mut ctx, detail, limit, page, json).await?; - } - Commands::Status { json } => { - commands::simple::run_status(&mut ctx, json).await?; - } - Commands::Repo { cmd } => { - commands::simple::run_repo(&mut ctx, cmd).await?; - } - Commands::Sync { - dry_run, - filter_tags, - exclude, - json, - } => { - commands::simple::run_sync(&mut ctx, dry_run, filter_tags, exclude, json).await?; - } - Commands::Query { query, limit, page, json } => { - commands::simple::run_query(&mut ctx, &query, limit, page, json).await?; - } - Commands::Index { path, skip_embeddings } => { - commands::simple::run_index(&mut ctx, &path, skip_embeddings).await?; - } - Commands::Clean => { - commands::simple::run_clean(&mut ctx)?; - } - Commands::Tag { repo_id, tags } => { - commands::simple::run_tag(&mut ctx, &repo_id, &tags)?; - } - Commands::Meta { repo_id, tier, workspace_type } => { - commands::simple::run_meta(&mut ctx, &repo_id, tier, workspace_type)?; - } - #[cfg(feature = "tui")] - Commands::Tui => { - commands::simple::run_tui(&mut ctx).await?; - } - #[cfg(feature = "mcp")] - Commands::Mcp { tools } => { - commands::simple::run_mcp(&mut ctx, tools).await?; - } - Commands::Daemon { interval } => { - commands::simple::run_daemon(&mut ctx, interval).await?; - } - #[cfg(feature = "watch")] - Commands::Watch { path, duration } => { - commands::simple::run_watch(&mut ctx, &path, duration).await?; - } - Commands::SkillSync { output, filter_tags, dry_run } => { - commands::simple::run_skill_sync(&mut ctx, &output, filter_tags, dry_run)?; - } - Commands::SyncthingPush { - api_url, - api_key, - filter_tags, - experiment, - } => { - commands::simple::run_syncthing_push( - &mut ctx, - api_url, - api_key, - filter_tags, - experiment, - ) - .await?; - } - Commands::Digest => { - commands::simple::run_digest(&mut ctx).await?; - } - Commands::Oplog { limit, repo } => { - commands::simple::run_oplog(&mut ctx, limit, repo)?; - } - Commands::Metrics { repo_id, json, recalc } => { - commands::simple::run_metrics(&mut ctx, &repo_id, json, recalc)?; - } - Commands::ModuleGraph { repo_id, json } => { - commands::simple::run_module_graph(&mut ctx, &repo_id, json)?; - } - Commands::CallGraph { - repo_id, - callee, - caller, - file, - limit, - json, - } => { - commands::simple::run_call_graph( - &mut ctx, &repo_id, callee, caller, file, limit, json, - )?; - } - Commands::DependencyGraph { - repo_id, - direction, - relation_type, - json, - } => { - commands::simple::run_dependency_graph( - &mut ctx, - &repo_id, - &direction, - relation_type, - json, - )?; - } - Commands::CodeSymbols { - repo_id, - name, - symbol_type, - file, - limit, - json, - } => { - commands::simple::run_code_symbols( - &mut ctx, - &repo_id, - name, - symbol_type, - file, - limit, - json, - )?; - } - Commands::DeadCode { - repo_id, - include_pub, - limit, - json, - } => { - commands::simple::run_dead_code(&mut ctx, &repo_id, include_pub, limit, json)?; - } - Commands::GithubInfo { repo_id, write_summary, json } => { - commands::simple::run_github_info(&mut ctx, &repo_id, write_summary, json).await?; - } - #[cfg(feature = "tui")] - Commands::Discover => { - commands::simple::run_discover(&mut ctx)?; - } - Commands::Registry { cmd } => { - commands::simple::run_registry(&mut ctx, cmd)?; - } - Commands::Vault { cmd } => { - commands::simple::run_vault(&mut ctx, cmd).await?; - } - Commands::Skill { cmd } => { - commands::skill::run_skill(&mut ctx, cmd)?; - } - Commands::Workflow { cmd } => { - commands::workflow::run_workflow(&mut ctx, cmd)?; - } - Commands::KnowledgeReport { repo_id, activity_limit, json } => { - commands::simple::run_knowledge_report(&mut ctx, &repo_id, activity_limit, json)?; - } - Commands::Limit { cmd } => { - commands::limit::run_limit(&mut ctx, cmd)?; - } - Commands::Version => { - println!("devbase {}", env!("CARGO_PKG_VERSION")); - } - } - - Ok(()) -} +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use clap::{Parser, Subcommand}; + +use devbase::*; + +mod commands; + +#[derive(Parser)] +#[command(name = "devbase", version)] +#[command(about = "Developer workspace database and knowledge-base manager")] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +pub(crate) enum Commands { + /// Scan a directory for Git repositories and register them + Scan { + /// Directory to scan (defaults to workspace root) + #[arg(default_value = ".")] + path: String, + /// Register discovered repos into the database + #[arg(long)] + register: bool, + /// Output results as JSON + #[arg(long)] + json: bool, + }, + /// Check the health of registered repositories and the environment + Health { + /// Show detailed per-repo status + #[arg(long)] + detail: bool, + /// Maximum number of repos to display per page (0 = unlimited) + #[arg(long, default_value_t = 0)] + limit: usize, + /// Page number (1-based) + #[arg(long, default_value_t = 1)] + page: usize, + /// Output results as JSON + #[arg(long)] + json: bool, + }, + /// Show index status for registered repositories + Status { + /// Output results as JSON + #[arg(long)] + json: bool, + }, + /// Sync registered repositories with their upstream remotes + Sync { + /// Dry-run: show what would be updated without applying + #[arg(long)] + dry_run: bool, + /// Comma-separated list of tags to filter repositories (OR logic) + #[arg(long)] + filter_tags: Option, + /// Comma-separated list of repo IDs to exclude from sync + #[arg(long)] + exclude: Option, + /// Output results as JSON + #[arg(long)] + json: bool, + }, + /// Query the knowledge base + Query { + /// Query expression, e.g. "lang:rust stale:>30" + query: String, + /// Maximum number of results per page (0 = unlimited) + #[arg(long, default_value_t = 0)] + limit: usize, + /// Page number (1-based) + #[arg(long, default_value_t = 1)] + page: usize, + /// Output results as JSON + #[arg(long)] + json: bool, + }, + /// Index repository summaries and module structures + Index { + /// Specific path to index; if omitted, index all registered repos + #[arg(default_value = "")] + path: String, + /// Skip semantic embedding generation (symbols/calls still indexed) + #[arg(long)] + skip_embeddings: bool, + }, + /// Remove archive/backup entries from registry + Clean, + /// Tag a registered repository + Tag { + /// Repository ID + repo_id: String, + /// Comma-separated tags + tags: String, + }, + /// Update metadata (tier / workspace type) of a registered repository + Meta { + /// Repository ID + repo_id: String, + /// Data tier: public, cooperative, or private + #[arg(long)] + tier: Option, + /// Workspace type: git, openclaw, or generic + #[arg(long)] + workspace_type: Option, + }, + /// Launch interactive TUI + #[cfg(feature = "tui")] + Tui, + /// Run as an MCP server (stdio transport) + #[cfg(feature = "mcp")] + Mcp { + /// Comma-separated tool tiers to expose (stable,beta,experimental). + /// Defaults to all tiers if omitted. + #[arg(long)] + tools: Option, + }, + /// Start the background daemon for knowledge maintenance + Daemon { + /// Tick interval in seconds + #[arg(long)] + interval: Option, + }, + #[cfg(feature = "watch")] + /// Watch a directory for changes and schedule sync actions + Watch { + /// Directory to watch + #[arg(default_value = ".")] + path: String, + /// Watch duration in seconds + #[arg(long, default_value = "10")] + duration: u64, + }, + /// Sync vault notes with ai_context=true to Clarity SKILL.md format + SkillSync { + /// Output directory for generated SKILL.md files + #[arg(long, default_value = "skills")] + output: String, + /// Only sync notes matching specific tags (comma-separated) + #[arg(long)] + filter_tags: Option, + /// Preview mode: list what would be synced without writing files + #[arg(long)] + dry_run: bool, + }, + /// Push registered repositories to Syncthing as sync folders + SyncthingPush { + /// Syncthing REST API base URL + #[arg(long, default_value = "http://127.0.0.1:8384")] + api_url: String, + /// Syncthing API key (optional if no auth) + #[arg(long)] + api_key: Option, + /// Only push repos matching these tags (comma-separated, OR logic) + #[arg(long)] + filter_tags: Option, + /// Only push the repo associated with this experiment ID + #[arg(long)] + experiment: Option, + }, + /// Auto-discover relationships between registered repositories + #[cfg(feature = "tui")] + Discover, + /// Generate daily knowledge digest + Digest, + /// Generate knowledge coverage report for the workspace or a repo + KnowledgeReport { + /// Specific repo ID; if omitted, reports on the entire workspace + #[arg(default_value = "")] + repo_id: String, + /// Number of recent activity events to include + #[arg(long, default_value_t = 20)] + activity_limit: usize, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// View the operation log + Oplog { + /// Limit number of entries (default: 20) + #[arg(long, default_value_t = 20)] + limit: i64, + /// Filter by repo ID + #[arg(long)] + repo: Option, + }, + /// Show code metrics for registered repositories + Metrics { + /// Specific repo ID; if omitted, shows all repos + #[arg(default_value = "")] + repo_id: String, + /// Output as JSON + #[arg(long)] + json: bool, + /// Recompute metrics from source (tokei) before showing + #[arg(long)] + recalc: bool, + }, + /// Show module graph for a repository + ModuleGraph { + /// Repository ID; if omitted, shows all Rust repos + #[arg(default_value = "")] + repo_id: String, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Query the intra-repository call graph + CallGraph { + /// Repository ID to query + repo_id: String, + /// Called function name (who calls X) + #[arg(long)] + callee: Option, + /// Calling function name (what does Y call) + #[arg(long)] + caller: Option, + /// Optional file path substring to narrow scope + #[arg(long)] + file: Option, + /// Maximum results + #[arg(long, default_value_t = 50)] + limit: usize, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Query cross-repository dependency graph + DependencyGraph { + /// Repository ID to query + repo_id: String, + /// Direction: outgoing (this repo depends on) or incoming (depends on this repo) + #[arg(long, default_value = "outgoing")] + direction: String, + /// Filter by relation type + #[arg(long)] + relation_type: Option, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Query code symbols for a repository + CodeSymbols { + /// Repository ID to query + repo_id: String, + /// Optional symbol name substring filter + #[arg(long)] + name: Option, + /// Optional symbol type filter (function, struct, enum, trait, impl, module, type_alias, constant, static) + #[arg(long)] + symbol_type: Option, + /// Optional file path substring filter + #[arg(long)] + file: Option, + /// Maximum results + #[arg(long, default_value_t = 50)] + limit: usize, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Find potentially dead (unused) functions in a repository + DeadCode { + /// Repository ID to analyze + repo_id: String, + /// Also report `pub fn` items + #[arg(long)] + include_pub: bool, + /// Maximum results + #[arg(long, default_value_t = 50)] + limit: usize, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Fetch GitHub metadata for a registered repository + GithubInfo { + /// Repository ID + repo_id: String, + /// Write GitHub description into repo summary + #[arg(long)] + write_summary: bool, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Registry backup and restore operations + Registry { + #[command(subcommand)] + cmd: RegistryCommands, + }, + /// Vault note management + Vault { + #[command(subcommand)] + cmd: VaultCommands, + }, + /// Skill Runtime — install, discover, and execute AI skills + Skill { + #[command(subcommand)] + cmd: SkillCommands, + }, + /// Workflow Engine — orchestrate multi-Skill pipelines + Workflow { + #[command(subcommand)] + cmd: WorkflowCommands, + }, + /// Manage known system limits (L3 risk layer) + Limit { + #[command(subcommand)] + cmd: LimitCommands, + }, + /// Repository management — list repos and check git health + Repo { + #[command(subcommand)] + cmd: RepoCommands, + }, + /// Show version information + Version, +} + +#[derive(Subcommand)] +pub(crate) enum RepoCommands { + /// List all registered repositories + List { + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Show git health status (ahead/behind/dirty/managed) for all repos + Status { + /// Output as JSON + #[arg(long)] + json: bool, + }, +} + +#[derive(Subcommand)] +pub(crate) enum SkillCommands { + /// List installed skills + List { + /// Filter by skill type (builtin, custom, system) + #[arg(long)] + skill_type: Option, + /// Filter by category + #[arg(long)] + category: Option, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Install a skill from a local path or Git URL + Install { + /// Path to the skill directory or Git URL (must contain SKILL.md) + source: String, + /// Force treating source as a Git URL + #[arg(long)] + git: bool, + }, + /// Uninstall a skill + Uninstall { + /// Skill ID to remove + skill_id: String, + }, + /// Show skill details + Info { + /// Skill ID + skill_id: String, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Search skills by name or description + Search { + /// Query string + query: String, + /// Use semantic search (requires embeddings) + #[arg(long)] + semantic: bool, + /// Filter by category + #[arg(long)] + category: Option, + /// Maximum results + #[arg(long, default_value_t = 10)] + limit: usize, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Execute a skill + Run { + /// Skill ID + skill_id: String, + /// Arguments as key=value pairs + #[arg(long = "arg")] + args: Vec, + /// Timeout in seconds + #[arg(long, default_value_t = 30)] + timeout: u64, + /// Output full result as JSON + #[arg(long)] + json: bool, + }, + /// Validate a local SKILL.md file + Validate { + /// Path to SKILL.md or skill directory + path: String, + }, + /// Validate and prepare a skill for publishing + Publish { + /// Path to skill directory (default: current directory) + #[arg(default_value = ".")] + path: String, + /// Dry-run: validate without creating tag + #[arg(long)] + dry_run: bool, + }, + /// Sync skills to a plans directory (generic output, was clarity-only) + Sync { + /// Output directory for generated plan JSON files + output_dir: String, + }, + /// Discover and auto-package a project as a Skill + Discover { + /// Path to the project directory (or Git URL) + path: String, + /// Explicit skill ID (defaults to project name) + #[arg(long)] + skill_id: Option, + /// Dry-run: print generated files without installing + #[arg(long)] + dry_run: bool, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Recalculate skill scores from execution history + RecalcScores, + /// Show top-rated skills + Top { + /// Maximum results + #[arg(long, default_value_t = 10)] + limit: usize, + }, + /// Recommend skills based on execution scores + Recommend { + /// Filter by category + #[arg(long)] + category: Option, + /// Maximum results + #[arg(long, default_value_t = 5)] + limit: usize, + }, + /// Import skills from external sources (GitHub repos or local directories) + Import { + /// Source URL or path (GitHub URL or local directory) + source: String, + /// Path within the source to scan for SKILL.md files + #[arg(long)] + source_path: Option, + /// Dry-run: list discovered skills without installing + #[arg(long)] + dry_run: bool, + /// Output as JSON + #[arg(long)] + json: bool, + }, +} + +#[derive(Subcommand)] +pub(crate) enum WorkflowCommands { + /// List registered workflows + List { + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Show workflow definition + Show { + /// Workflow ID + workflow_id: String, + }, + /// Register a workflow from a YAML file + Register { + /// Path to workflow.yaml + path: String, + }, + /// Run a workflow + Run { + /// Workflow ID + workflow_id: String, + /// Workflow inputs as key=value pairs + #[arg(long = "input")] + inputs: Vec, + }, + /// Delete a workflow + Delete { + /// Workflow ID + workflow_id: String, + }, +} + +#[derive(Subcommand)] +pub(crate) enum VaultCommands { + /// Scan a directory for Markdown notes and sync into the vault + Scan { + /// Directory to scan (defaults to default vault dir) + #[arg(default_value = "")] + path: String, + }, + /// Rebuild the Tantivy search index for all vault notes + Reindex, + /// List all vault notes + List { + /// Filter by tag + #[arg(short, long)] + tag: Option, + }, + /// Read a vault note by its relative path (e.g. "99-Meta/devbase-essence.md") + Read { + /// Relative path of the note within the vault + path: String, + }, + /// Write or overwrite a vault note + Write { + /// Relative path of the note within the vault + path: String, + /// Note content (use "-" to read from stdin) + #[arg(short, long)] + content: Option, + /// Title (optional, defaults to filename) + #[arg(short, long)] + title: Option, + }, + /// Search vault notes by keyword + Search { + /// Search query + query: String, + /// Maximum results + #[arg(short, long, default_value_t = 20)] + limit: usize, + }, + /// Export vault notes to a directory with integrity validation + Export { + /// Output directory for the exported vault + #[arg(default_value = "")] + output_dir: String, + }, + /// Show Git revision history for a vault note + History { + /// Relative path of the note within the vault + path: String, + }, + /// Sync all configured vault roots (scan + incremental update) + Sync { + /// Perform a full rescan instead of incremental + #[arg(long)] + full: bool, + }, +} + +#[derive(Subcommand)] +pub(crate) enum LimitCommands { + /// Add or update a known limit + Add { + /// Unique identifier (kebab-case recommended) + id: String, + /// Category: hard-veto, known-bug, external-dep + #[arg(long, default_value = "known-bug")] + category: String, + /// Description of the limit + #[arg(long)] + description: Option, + /// Source reference (e.g., AGENTS.md, oplog) + #[arg(long)] + source: Option, + /// Severity 1-5 + #[arg(long)] + severity: Option, + }, + /// List known limits + List { + /// Filter by category + #[arg(long)] + category: Option, + /// Filter by mitigated status + #[arg(long)] + mitigated: Option, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Resolve (mitigate) a known limit + Resolve { + /// Limit ID + id: String, + /// Reason for resolution (optional, stored in L4 metacognition layer) + #[arg(long)] + reason: Option, + }, + /// Delete a known limit + Delete { + /// Limit ID + id: String, + }, + /// Seed hard vetoes from AGENTS.md into the registry + Seed, +} + +#[derive(Subcommand)] +pub(crate) enum RegistryCommands { + /// Export registry to a backup file + Export { + /// Output format: sqlite or json + #[arg(long, default_value = "sqlite")] + format: String, + /// Output file path (optional, defaults to backup dir with timestamp) + #[arg(long)] + output: Option, + }, + /// Import registry from a backup SQLite file + Import { + /// Source backup file path + path: String, + /// Skip dry-run and execute immediately + #[arg(long)] + yes: bool, + }, + /// List existing registry backups + Backups, + /// Clean old backups, keeping only the most recent ones + Clean, +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .init(); + + let mut ctx = storage::AppContext::with_defaults()?; + let _lang = if ctx.config.general.language == "auto" || ctx.config.general.language.is_empty() { + let detected = i18n::detect_system_language(); + ctx.config.general.language = detected.clone(); + if let Err(e) = ctx.config.save() { + eprintln!("警告: 无法保存语言配置: {}", e); + } + detected + } else { + ctx.config.general.language.clone() + }; + // i18n is now initialized via AppContext, no global state needed + + let cli = Cli::parse(); + + match cli.command { + Commands::Scan { path, register, json } => { + commands::simple::run_scan(&mut ctx, &path, register, json).await?; + } + Commands::Health { detail, limit, page, json } => { + commands::simple::run_health(&mut ctx, detail, limit, page, json).await?; + } + Commands::Status { json } => { + commands::simple::run_status(&mut ctx, json).await?; + } + Commands::Repo { cmd } => { + commands::simple::run_repo(&mut ctx, cmd).await?; + } + Commands::Sync { + dry_run, + filter_tags, + exclude, + json, + } => { + commands::simple::run_sync(&mut ctx, dry_run, filter_tags, exclude, json).await?; + } + Commands::Query { query, limit, page, json } => { + commands::simple::run_query(&mut ctx, &query, limit, page, json).await?; + } + Commands::Index { path, skip_embeddings } => { + commands::simple::run_index(&mut ctx, &path, skip_embeddings).await?; + } + Commands::Clean => { + commands::simple::run_clean(&mut ctx)?; + } + Commands::Tag { repo_id, tags } => { + commands::simple::run_tag(&mut ctx, &repo_id, &tags)?; + } + Commands::Meta { repo_id, tier, workspace_type } => { + commands::simple::run_meta(&mut ctx, &repo_id, tier, workspace_type)?; + } + #[cfg(feature = "tui")] + Commands::Tui => { + commands::simple::run_tui(&mut ctx).await?; + } + #[cfg(feature = "mcp")] + Commands::Mcp { tools } => { + commands::simple::run_mcp(&mut ctx, tools).await?; + } + Commands::Daemon { interval } => { + commands::simple::run_daemon(&mut ctx, interval).await?; + } + #[cfg(feature = "watch")] + Commands::Watch { path, duration } => { + commands::simple::run_watch(&mut ctx, &path, duration).await?; + } + Commands::SkillSync { output, filter_tags, dry_run } => { + commands::simple::run_skill_sync(&mut ctx, &output, filter_tags, dry_run)?; + } + Commands::SyncthingPush { + api_url, + api_key, + filter_tags, + experiment, + } => { + commands::simple::run_syncthing_push( + &mut ctx, + api_url, + api_key, + filter_tags, + experiment, + ) + .await?; + } + Commands::Digest => { + commands::simple::run_digest(&mut ctx).await?; + } + Commands::Oplog { limit, repo } => { + commands::simple::run_oplog(&mut ctx, limit, repo)?; + } + Commands::Metrics { repo_id, json, recalc } => { + commands::simple::run_metrics(&mut ctx, &repo_id, json, recalc)?; + } + Commands::ModuleGraph { repo_id, json } => { + commands::simple::run_module_graph(&mut ctx, &repo_id, json)?; + } + Commands::CallGraph { + repo_id, + callee, + caller, + file, + limit, + json, + } => { + commands::simple::run_call_graph( + &mut ctx, &repo_id, callee, caller, file, limit, json, + )?; + } + Commands::DependencyGraph { + repo_id, + direction, + relation_type, + json, + } => { + commands::simple::run_dependency_graph( + &mut ctx, + &repo_id, + &direction, + relation_type, + json, + )?; + } + Commands::CodeSymbols { + repo_id, + name, + symbol_type, + file, + limit, + json, + } => { + commands::simple::run_code_symbols( + &mut ctx, + &repo_id, + name, + symbol_type, + file, + limit, + json, + )?; + } + Commands::DeadCode { + repo_id, + include_pub, + limit, + json, + } => { + commands::simple::run_dead_code(&mut ctx, &repo_id, include_pub, limit, json)?; + } + Commands::GithubInfo { repo_id, write_summary, json } => { + commands::simple::run_github_info(&mut ctx, &repo_id, write_summary, json).await?; + } + #[cfg(feature = "tui")] + Commands::Discover => { + commands::simple::run_discover(&mut ctx)?; + } + Commands::Registry { cmd } => { + commands::simple::run_registry(&mut ctx, cmd)?; + } + Commands::Vault { cmd } => { + commands::simple::run_vault(&mut ctx, cmd).await?; + } + Commands::Skill { cmd } => { + commands::skill::run_skill(&mut ctx, cmd)?; + } + Commands::Workflow { cmd } => { + commands::workflow::run_workflow(&mut ctx, cmd)?; + } + Commands::KnowledgeReport { repo_id, activity_limit, json } => { + commands::simple::run_knowledge_report(&mut ctx, &repo_id, activity_limit, json)?; + } + Commands::Limit { cmd } => { + commands::limit::run_limit(&mut ctx, cmd)?; + } + Commands::Version => { + println!("devbase {}", env!("CARGO_PKG_VERSION")); + } + } + + Ok(()) +} diff --git a/src/mcp/tools/vault.rs b/src/mcp/tools/vault.rs index e9689ce..b88b7c3 100644 --- a/src/mcp/tools/vault.rs +++ b/src/mcp/tools/vault.rs @@ -215,12 +215,7 @@ Returns: JSON with success status and the written file path."#, .context("Missing required argument: content")?; let append = args.get("append").and_then(|v| v.as_bool()).unwrap_or(false); - let vault_root = ctx - .storage - .workspace_dir() - .map(|ws| ws.join("vault")) - .unwrap_or_else(|_| std::path::PathBuf::from("vault")); - let target = resolve_vault_path(path, &vault_root)?; + let target = resolve_vault_write_path(ctx, path)?; if let Some(parent) = target.parent() { std::fs::create_dir_all(parent)?; @@ -241,7 +236,92 @@ Returns: JSON with success status and the written file path."#, } } +/// Resolve a vault write path by checking entities.local_path first, +/// then falling back to configured vault roots. +fn resolve_vault_write_path( + ctx: &crate::storage::AppContext, + path: &str, +) -> anyhow::Result { + // 1. Check if this note already exists in entities (has local_path) + if let Ok(conn) = ctx.conn() { + let local_path: Option = conn + .query_row( + "SELECT local_path FROM entities WHERE entity_type = ?1 AND (id = ?2 OR name = ?2)", + rusqlite::params![crate::registry::ENTITY_TYPE_VAULT_NOTE, path], + |row| row.get(0), + ) + .ok(); + if let Some(lp) = local_path { + let p = std::path::PathBuf::from(lp); + if p.exists() || p.parent().map(|d| d.exists()).unwrap_or(false) { + return Ok(p); + } + } + } + + // 2. Fall back to vault roots from config + let vault_roots = match crate::config::Config::load() { + Ok(cfg) if !cfg.vault.roots.is_empty() => { + cfg.vault.roots.iter().map(std::path::PathBuf::from).collect() + } + _ => { + vec![ctx + .storage + .workspace_dir() + .map(|ws| ws.join("vault")) + .unwrap_or_else(|_| std::path::PathBuf::from("vault"))] + } + }; + + let relative = std::path::Path::new(path); + for root in &vault_roots { + let target = resolve_vault_relative_path(relative, root)?; + if target.starts_with(root) { + return Ok(target); + } + } + + anyhow::bail!("Path '{}' cannot be resolved under any configured vault root", path) +} + +/// Resolve a relative path under a single vault root. +fn resolve_vault_relative_path( + relative_path: &std::path::Path, + vault_root: &std::path::Path, +) -> anyhow::Result { + let path = relative_path; + if path.is_absolute() { + anyhow::bail!("Absolute paths are not allowed in vault: {}", relative_path.display()); + } + let s = relative_path.to_string_lossy(); + if s.starts_with('/') || s.starts_with('\\') { + anyhow::bail!("Absolute paths are not allowed in vault: {}", relative_path.display()); + } + + let mut normalized = std::path::PathBuf::new(); + for component in path.components() { + match component { + std::path::Component::Normal(name) => normalized.push(name), + std::path::Component::CurDir => {} + std::path::Component::ParentDir => { + if !normalized.pop() { + anyhow::bail!("Path escapes vault root: {}", relative_path.display()); + } + } + _ => anyhow::bail!("Invalid path component in: {}", relative_path.display()), + } + } + + let target = vault_root.join(&normalized); + if !target.starts_with(vault_root) { + anyhow::bail!("Path escapes vault root: {}", relative_path.display()); + } + + Ok(target) +} + /// Resolve a vault-relative path, enforcing that it stays within the vault root. +#[allow(dead_code)] fn resolve_vault_path( relative_path: &str, vault_root: &std::path::Path, @@ -379,7 +459,7 @@ Returns: JSON with success status and the generated file path."#, let digest = ctx.generate_daily_digest()?; let digest_str = digest.get("digest").and_then(|v| v.as_str()).unwrap_or(""); - let target = resolve_vault_path(&rel_path, &vault_root)?; + let target = resolve_vault_relative_path(std::path::Path::new(&rel_path), &vault_root)?; if let Some(parent) = target.parent() { std::fs::create_dir_all(parent)?; diff --git a/src/vault/mod.rs b/src/vault/mod.rs index cbfc0e0..7f108d0 100644 --- a/src/vault/mod.rs +++ b/src/vault/mod.rs @@ -1,287 +1,289 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -pub mod backlinks; -pub mod export; -pub mod frontmatter; -pub mod fs_io; -pub mod history; -pub mod indexer; -pub mod scanner; -pub mod wikilink; - -use crate::storage::AppContext; - -impl crate::clients::VaultClient for AppContext { - fn list_vault_notes(&self) -> anyhow::Result { - let conn = self.conn()?; - let notes = crate::registry::vault::list_vault_notes(&conn)?; - let results: Vec = notes - .into_iter() - .map(|n| { - serde_json::json!({ - "id": n.id, - "path": n.path, - "title": n.title, - "tags": n.tags, - }) - }) - .collect(); - Ok(serde_json::json!({"success": true, "count": results.len(), "notes": results})) - } - - fn read_vault_note(&self, path: &str) -> anyhow::Result { - let (body, frontmatter) = fs_io::read_note_body(path) - .ok_or_else(|| anyhow::anyhow!("note not found or unreadable"))?; - Ok(serde_json::json!({ - "success": true, - "path": path, - "content": body, - "frontmatter": frontmatter, - })) - } - - fn get_backlinks(&self, note_id: &str) -> anyhow::Result { - let backlinks = match self.conn() { - Ok(conn) => match crate::registry::vault::list_vault_notes(&conn) { - Ok(notes) => notes - .into_iter() - .filter(|n| { - n.outgoing_links.iter().any(|l| { - let normalized = l.replace('\\', "/"); - normalized == note_id.replace('\\', "/") - || normalized - == note_id - .replace('\\', "/") - .strip_suffix(".md") - .unwrap_or(¬e_id.replace('\\', "/")) - || l == note_id - }) - }) - .map(|n| n.id.replace('\\', "/")) - .collect(), - Err(_) => Vec::new(), - }, - Err(_) => Vec::new(), - }; - Ok(serde_json::json!({ - "success": true, - "target": note_id, - "count": backlinks.len(), - "backlinks": backlinks, - })) - } - - fn build_vault_graph( - &self, - repo_id: Option<&str>, - note_id: Option<&str>, - depth: usize, - ) -> anyhow::Result { - let conn = match self.conn() { - Ok(c) => c, - Err(_) => { - return Ok(serde_json::json!({ - "success": true, - "count": 0, - "edge_count": 0, - "nodes": [], - "edges": [], - })); - } - }; - - let notes = match crate::registry::vault::list_vault_notes(&conn) { - Ok(n) => n, - Err(_) => { - return Ok(serde_json::json!({ - "success": true, - "count": 0, - "edge_count": 0, - "nodes": [], - "edges": [], - })); - } - }; - - if notes.is_empty() { - return Ok(serde_json::json!({ - "success": true, - "count": 0, - "edge_count": 0, - "nodes": [], - "edges": [], - })); - } - - let mut id_to_title: std::collections::HashMap = - std::collections::HashMap::new(); - let mut id_to_repo: std::collections::HashMap = - std::collections::HashMap::new(); - let mut outgoing: std::collections::HashMap> = - std::collections::HashMap::new(); - let mut incoming: std::collections::HashMap> = - std::collections::HashMap::new(); - - for note in ¬es { - let id = note.id.replace('\\', "/"); - id_to_title.insert(id.clone(), note.title.clone().unwrap_or_else(|| id.clone())); - if let Some(ref r) = note.linked_repo { - id_to_repo.insert(id.clone(), r.clone()); - } - - let targets: Vec = - note.outgoing_links.iter().map(|t| t.replace('\\', "/")).collect(); - outgoing.insert(id.clone(), targets.clone()); - - for target in targets { - incoming.entry(target.clone()).or_default().push(id.clone()); - if let Some(stem) = target.strip_suffix(".md") { - incoming.entry(stem.to_string()).or_default().push(id.clone()); - } - } - } - - let mut id_lookup: std::collections::HashMap = - std::collections::HashMap::new(); - for id in id_to_title.keys() { - id_lookup.insert(id.clone(), id.clone()); - if let Some(stem) = id.strip_suffix(".md") { - id_lookup.insert(stem.to_string(), id.clone()); - } - } - - let allowed_ids: std::collections::HashSet = if let Some(rid) = repo_id { - id_to_repo.iter().filter(|(_, r)| *r == rid).map(|(id, _)| id.clone()).collect() - } else { - id_to_title.keys().cloned().collect() - }; - - let max_depth = depth.clamp(1, 3); - - let (selected_nodes, selected_edges): ( - std::collections::HashSet, - Vec<(String, String)>, - ) = if let Some(start_id) = note_id { - let start_normalized = - id_lookup.get(start_id).cloned().unwrap_or_else(|| start_id.replace('\\', "/")); - if !allowed_ids.contains(&start_normalized) { - return Ok(serde_json::json!({ - "success": true, - "count": 1, - "edge_count": 0, - "nodes": [serde_json::json!({ - "id": start_normalized, - "title": id_to_title.get(&start_normalized).unwrap_or(&start_normalized), - })], - "edges": [], - })); - } - - let mut visited: std::collections::HashSet = std::collections::HashSet::new(); - let mut edges: Vec<(String, String)> = Vec::new(); - let mut queue: Vec<(String, usize)> = vec![(start_normalized.clone(), 0)]; - visited.insert(start_normalized.clone()); - - while let Some((current, dist)) = queue.pop() { - if dist >= max_depth { - continue; - } - for target in outgoing.get(¤t).into_iter().flatten() { - let norm = id_lookup.get(target).cloned().unwrap_or_else(|| target.clone()); - if allowed_ids.contains(&norm) { - edges.push((current.clone(), norm.clone())); - if visited.insert(norm.clone()) { - queue.push((norm, dist + 1)); - } - } - } - for source in incoming.get(¤t).into_iter().flatten() { - let norm = id_lookup.get(source).cloned().unwrap_or_else(|| source.clone()); - if allowed_ids.contains(&norm) { - edges.push((norm.clone(), current.clone())); - if visited.insert(norm.clone()) { - queue.push((norm, dist + 1)); - } - } - } - } - - (visited, edges) - } else { - let mut all_edges: Vec<(String, String)> = Vec::new(); - for (source, targets) in &outgoing { - if !allowed_ids.contains(source) { - continue; - } - for target in targets { - let norm = id_lookup.get(target).cloned().unwrap_or_else(|| target.clone()); - if allowed_ids.contains(&norm) { - all_edges.push((source.clone(), norm.clone())); - } - } - } - (allowed_ids.clone(), all_edges) - }; - - let nodes: Vec<_> = selected_nodes - .iter() - .map(|id| { - serde_json::json!({ - "id": id, - "title": id_to_title.get(id).unwrap_or(id), - }) - }) - .collect(); - - let edges_json: Vec<_> = selected_edges - .iter() - .map(|(s, t)| serde_json::json!({ "source": s, "target": t })) - .collect(); - - Ok(serde_json::json!({ - "success": true, - "count": nodes.len(), - "edge_count": edges_json.len(), - "nodes": nodes, - "edges": edges_json, - })) - } - - fn get_vault_history(&self, note_id: &str) -> anyhow::Result { - let vault_dir = self.storage.workspace_dir().ok().map(|ws| ws.join("vault")); - let history = if let Some(ref vd) = vault_dir { - crate::vault::history::note_history(vd, note_id).unwrap_or_default() - } else { - Vec::new() - }; - let entries: Vec = history - .into_iter() - .map(|h| { - let ts = chrono::DateTime::from_timestamp(h.timestamp, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()) - .unwrap_or_else(|| "unknown".to_string()); - serde_json::json!({ - "commit": h.commit, - "author": h.author, - "email": h.email, - "timestamp": ts, - "message": h.message, - "insertions": h.insertions, - "deletions": h.deletions, - }) - }) - .collect(); - Ok(serde_json::json!({ - "success": true, - "note_id": note_id, - "count": entries.len(), - "history": entries, - })) - } - - fn export_vault(&self, output_dir: &str) -> anyhow::Result { - let vault_dir = self.storage.workspace_dir()?.join("vault"); - let out = std::path::PathBuf::from(output_dir); - crate::vault::export::export_vault(&vault_dir, &out) - } -} +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +pub mod backlinks; +pub mod export; +pub mod frontmatter; +pub mod fs_io; +pub mod history; +pub mod indexer; +pub mod scanner; +pub mod wikilink; + +pub use scanner::ScanOptions; + +use crate::storage::AppContext; + +impl crate::clients::VaultClient for AppContext { + fn list_vault_notes(&self) -> anyhow::Result { + let conn = self.conn()?; + let notes = crate::registry::vault::list_vault_notes(&conn)?; + let results: Vec = notes + .into_iter() + .map(|n| { + serde_json::json!({ + "id": n.id, + "path": n.path, + "title": n.title, + "tags": n.tags, + }) + }) + .collect(); + Ok(serde_json::json!({"success": true, "count": results.len(), "notes": results})) + } + + fn read_vault_note(&self, path: &str) -> anyhow::Result { + let (body, frontmatter) = fs_io::read_note_body(path) + .ok_or_else(|| anyhow::anyhow!("note not found or unreadable"))?; + Ok(serde_json::json!({ + "success": true, + "path": path, + "content": body, + "frontmatter": frontmatter, + })) + } + + fn get_backlinks(&self, note_id: &str) -> anyhow::Result { + let backlinks = match self.conn() { + Ok(conn) => match crate::registry::vault::list_vault_notes(&conn) { + Ok(notes) => notes + .into_iter() + .filter(|n| { + n.outgoing_links.iter().any(|l| { + let normalized = l.replace('\\', "/"); + normalized == note_id.replace('\\', "/") + || normalized + == note_id + .replace('\\', "/") + .strip_suffix(".md") + .unwrap_or(¬e_id.replace('\\', "/")) + || l == note_id + }) + }) + .map(|n| n.id.replace('\\', "/")) + .collect(), + Err(_) => Vec::new(), + }, + Err(_) => Vec::new(), + }; + Ok(serde_json::json!({ + "success": true, + "target": note_id, + "count": backlinks.len(), + "backlinks": backlinks, + })) + } + + fn build_vault_graph( + &self, + repo_id: Option<&str>, + note_id: Option<&str>, + depth: usize, + ) -> anyhow::Result { + let conn = match self.conn() { + Ok(c) => c, + Err(_) => { + return Ok(serde_json::json!({ + "success": true, + "count": 0, + "edge_count": 0, + "nodes": [], + "edges": [], + })); + } + }; + + let notes = match crate::registry::vault::list_vault_notes(&conn) { + Ok(n) => n, + Err(_) => { + return Ok(serde_json::json!({ + "success": true, + "count": 0, + "edge_count": 0, + "nodes": [], + "edges": [], + })); + } + }; + + if notes.is_empty() { + return Ok(serde_json::json!({ + "success": true, + "count": 0, + "edge_count": 0, + "nodes": [], + "edges": [], + })); + } + + let mut id_to_title: std::collections::HashMap = + std::collections::HashMap::new(); + let mut id_to_repo: std::collections::HashMap = + std::collections::HashMap::new(); + let mut outgoing: std::collections::HashMap> = + std::collections::HashMap::new(); + let mut incoming: std::collections::HashMap> = + std::collections::HashMap::new(); + + for note in ¬es { + let id = note.id.replace('\\', "/"); + id_to_title.insert(id.clone(), note.title.clone().unwrap_or_else(|| id.clone())); + if let Some(ref r) = note.linked_repo { + id_to_repo.insert(id.clone(), r.clone()); + } + + let targets: Vec = + note.outgoing_links.iter().map(|t| t.replace('\\', "/")).collect(); + outgoing.insert(id.clone(), targets.clone()); + + for target in targets { + incoming.entry(target.clone()).or_default().push(id.clone()); + if let Some(stem) = target.strip_suffix(".md") { + incoming.entry(stem.to_string()).or_default().push(id.clone()); + } + } + } + + let mut id_lookup: std::collections::HashMap = + std::collections::HashMap::new(); + for id in id_to_title.keys() { + id_lookup.insert(id.clone(), id.clone()); + if let Some(stem) = id.strip_suffix(".md") { + id_lookup.insert(stem.to_string(), id.clone()); + } + } + + let allowed_ids: std::collections::HashSet = if let Some(rid) = repo_id { + id_to_repo.iter().filter(|(_, r)| *r == rid).map(|(id, _)| id.clone()).collect() + } else { + id_to_title.keys().cloned().collect() + }; + + let max_depth = depth.clamp(1, 3); + + let (selected_nodes, selected_edges): ( + std::collections::HashSet, + Vec<(String, String)>, + ) = if let Some(start_id) = note_id { + let start_normalized = + id_lookup.get(start_id).cloned().unwrap_or_else(|| start_id.replace('\\', "/")); + if !allowed_ids.contains(&start_normalized) { + return Ok(serde_json::json!({ + "success": true, + "count": 1, + "edge_count": 0, + "nodes": [serde_json::json!({ + "id": start_normalized, + "title": id_to_title.get(&start_normalized).unwrap_or(&start_normalized), + })], + "edges": [], + })); + } + + let mut visited: std::collections::HashSet = std::collections::HashSet::new(); + let mut edges: Vec<(String, String)> = Vec::new(); + let mut queue: Vec<(String, usize)> = vec![(start_normalized.clone(), 0)]; + visited.insert(start_normalized.clone()); + + while let Some((current, dist)) = queue.pop() { + if dist >= max_depth { + continue; + } + for target in outgoing.get(¤t).into_iter().flatten() { + let norm = id_lookup.get(target).cloned().unwrap_or_else(|| target.clone()); + if allowed_ids.contains(&norm) { + edges.push((current.clone(), norm.clone())); + if visited.insert(norm.clone()) { + queue.push((norm, dist + 1)); + } + } + } + for source in incoming.get(¤t).into_iter().flatten() { + let norm = id_lookup.get(source).cloned().unwrap_or_else(|| source.clone()); + if allowed_ids.contains(&norm) { + edges.push((norm.clone(), current.clone())); + if visited.insert(norm.clone()) { + queue.push((norm, dist + 1)); + } + } + } + } + + (visited, edges) + } else { + let mut all_edges: Vec<(String, String)> = Vec::new(); + for (source, targets) in &outgoing { + if !allowed_ids.contains(source) { + continue; + } + for target in targets { + let norm = id_lookup.get(target).cloned().unwrap_or_else(|| target.clone()); + if allowed_ids.contains(&norm) { + all_edges.push((source.clone(), norm.clone())); + } + } + } + (allowed_ids.clone(), all_edges) + }; + + let nodes: Vec<_> = selected_nodes + .iter() + .map(|id| { + serde_json::json!({ + "id": id, + "title": id_to_title.get(id).unwrap_or(id), + }) + }) + .collect(); + + let edges_json: Vec<_> = selected_edges + .iter() + .map(|(s, t)| serde_json::json!({ "source": s, "target": t })) + .collect(); + + Ok(serde_json::json!({ + "success": true, + "count": nodes.len(), + "edge_count": edges_json.len(), + "nodes": nodes, + "edges": edges_json, + })) + } + + fn get_vault_history(&self, note_id: &str) -> anyhow::Result { + let vault_dir = self.storage.workspace_dir().ok().map(|ws| ws.join("vault")); + let history = if let Some(ref vd) = vault_dir { + crate::vault::history::note_history(vd, note_id).unwrap_or_default() + } else { + Vec::new() + }; + let entries: Vec = history + .into_iter() + .map(|h| { + let ts = chrono::DateTime::from_timestamp(h.timestamp, 0) + .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()) + .unwrap_or_else(|| "unknown".to_string()); + serde_json::json!({ + "commit": h.commit, + "author": h.author, + "email": h.email, + "timestamp": ts, + "message": h.message, + "insertions": h.insertions, + "deletions": h.deletions, + }) + }) + .collect(); + Ok(serde_json::json!({ + "success": true, + "note_id": note_id, + "count": entries.len(), + "history": entries, + })) + } + + fn export_vault(&self, output_dir: &str) -> anyhow::Result { + let vault_dir = self.storage.workspace_dir()?.join("vault"); + let out = std::path::PathBuf::from(output_dir); + crate::vault::export::export_vault(&vault_dir, &out) + } +} diff --git a/src/vault/scanner.rs b/src/vault/scanner.rs index 13b46e9..ba3c1a1 100644 --- a/src/vault/scanner.rs +++ b/src/vault/scanner.rs @@ -1,144 +1,187 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -use crate::registry::VaultNote; -use crate::vault::frontmatter::extract_frontmatter; -use crate::vault::wikilink::extract_wikilinks; - -use chrono::Utc; -use std::path::{Path, PathBuf}; -use tracing::{info, warn}; - -fn default_vault_dir() -> anyhow::Result { - let ws = crate::registry::WorkspaceRegistry::workspace_dir()?; - let vault = ws.join("vault"); - // P1-2: PARA directory structure - for sub in &["00-Inbox", "01-Projects", "02-Areas", "03-Resources", "04-Archives", "99-Meta"] { - std::fs::create_dir_all(vault.join(sub))?; - } - Ok(vault) -} - -/// Scan a vault directory for Markdown notes and sync them into the registry. -/// -/// * `vault_dir` — root of the vault. If `None`, uses the default location. -/// * Returns the number of notes synced. -pub fn scan_vault( - conn: &mut rusqlite::Connection, - vault_dir: Option<&Path>, -) -> anyhow::Result { - let root = match vault_dir { - Some(p) => p.to_path_buf(), - None => default_vault_dir()?, - }; - - if !root.exists() { - info!("Vault directory does not exist yet: {:?}", root); - return Ok(0); - } - - let mut synced = 0; - - for entry in walkdir::WalkDir::new(&root) - .follow_links(false) - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| e.file_type().is_file()) - .filter(|e| e.path().extension().map(|ext| ext == "md").unwrap_or(false)) - { - let path = entry.path(); - let rel_path = path.strip_prefix(&root).unwrap_or(path); - let id = rel_path.to_string_lossy().replace('\\', "/"); - - match std::fs::read_to_string(path) { - Ok(content) => { - let (frontmatter, body_offset) = extract_frontmatter(&content) - .map(|(fm, off)| (Some(fm), off)) - .unwrap_or((None, 0)); - - let body = &content[body_offset..]; - let wikilinks = extract_wikilinks(body); - let outgoing: Vec = wikilinks.iter().map(|l| l.target.clone()).collect(); - let block_refs: Vec = - wikilinks.iter().filter_map(|l| l.anchor.clone()).collect(); - - let title = frontmatter.as_ref().and_then(|fm| fm.title.clone()).or_else(|| { - // Fallback: first H1 heading - body.lines() - .find_map(|l| l.trim().strip_prefix("# ").map(|s| s.trim().to_string())) - }); - - let tags = frontmatter.as_ref().map(|fm| fm.tags.clone()).unwrap_or_default(); - let linked_repo = frontmatter.as_ref().and_then(|fm| fm.repo.clone()); - let fm_raw = frontmatter.map(|fm| fm.raw); - - let note = VaultNote { - id, - path: path.to_string_lossy().to_string(), - title, - content: body.trim().to_string(), - frontmatter: fm_raw, - tags, - outgoing_links: outgoing, - block_refs, - linked_repo, - created_at: Utc::now(), - updated_at: Utc::now(), - }; - - if let Err(e) = crate::registry::vault::save_vault_note(conn, ¬e) { - warn!("Failed to save vault note {}: {}", note.id, e); - } else { - synced += 1; - } - } - Err(e) => { - warn!("Failed to read vault file {:?}: {}", path, e); - } - } - } - - info!("Vault scan complete: {} notes synced", synced); - Ok(synced) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::registry::WorkspaceRegistry; - - #[test] - fn test_scan_vault_basic() { - let tmp = std::env::temp_dir().join(format!("devbase_vault_scan_{}", std::process::id())); - std::fs::create_dir_all(&tmp).unwrap(); - std::fs::write( - tmp.join("hello.md"), - "---\ntitle: Hello World\ntags: [rust, cli]\n---\n# Hello World\n\nThis is a [[test]] note.\n", - ) - .unwrap(); - - let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); - let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); - assert_eq!(count, 1); - - std::fs::remove_dir_all(&tmp).unwrap(); - } - - #[test] - fn test_scan_vault_empty_dir() { - let tmp = std::env::temp_dir().join(format!("devbase_vault_empty_{}", std::process::id())); - std::fs::create_dir_all(&tmp).unwrap(); - let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); - let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); - assert_eq!(count, 0); - std::fs::remove_dir_all(&tmp).unwrap(); - } - - #[test] - fn test_scan_vault_missing_dir() { - let tmp = - std::env::temp_dir().join(format!("devbase_vault_missing_{}", std::process::id())); - let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); - let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); - assert_eq!(count, 0); - } -} +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use crate::registry::VaultNote; +use crate::vault::frontmatter::extract_frontmatter; +use crate::vault::wikilink::extract_wikilinks; + +use chrono::Utc; +use std::path::{Path, PathBuf}; +use tracing::{info, warn}; + +fn default_vault_dir() -> anyhow::Result { + let ws = crate::registry::WorkspaceRegistry::workspace_dir()?; + let vault = ws.join("vault"); + // P1-2: PARA directory structure + for sub in &["00-Inbox", "01-Projects", "02-Areas", "03-Resources", "04-Archives", "99-Meta"] { + std::fs::create_dir_all(vault.join(sub))?; + } + Ok(vault) +} + +/// Options for scanning vault directories. +#[derive(Debug, Clone)] +pub struct ScanOptions { + pub roots: Vec, + pub follow_links: bool, +} + +impl Default for ScanOptions { + fn default() -> Self { + Self { + roots: vec![], + follow_links: true, + } + } +} + +/// Scan vault directories for Markdown notes and sync them into the registry. +/// +/// * `options` — scan options (roots, follow_links). If roots is empty, uses the default vault location. +/// * Returns the number of notes synced. +pub fn scan_vault_with_options( + conn: &mut rusqlite::Connection, + options: &ScanOptions, +) -> anyhow::Result { + let roots = if options.roots.is_empty() { + vec![default_vault_dir()?] + } else { + options.roots.clone() + }; + + let mut synced = 0; + + let multi_root = roots.len() > 1; + + for root in &roots { + if !root.exists() { + info!("Vault root does not exist yet: {:?}", root); + continue; + } + + let walker = walkdir::WalkDir::new(root) + .follow_links(options.follow_links); + + for entry in walker + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + .filter(|e| e.path().extension().map(|ext| ext == "md").unwrap_or(false)) + { + let path = entry.path(); + let rel_path = path.strip_prefix(root).unwrap_or(path); + let id = if multi_root { + format!("{}/{}", root.file_name().unwrap_or_default().to_string_lossy(), + rel_path.to_string_lossy().replace('\\', "/")) + } else { + rel_path.to_string_lossy().replace('\\', "/") + }; + + match std::fs::read_to_string(path) { + Ok(content) => { + let (frontmatter, body_offset) = extract_frontmatter(&content) + .map(|(fm, off)| (Some(fm), off)) + .unwrap_or((None, 0)); + + let body = &content[body_offset..]; + let wikilinks = extract_wikilinks(body); + let outgoing: Vec = wikilinks.iter().map(|l| l.target.clone()).collect(); + let block_refs: Vec = + wikilinks.iter().filter_map(|l| l.anchor.clone()).collect(); + + let title = frontmatter.as_ref().and_then(|fm| fm.title.clone()).or_else(|| { + // Fallback: first H1 heading + body.lines() + .find_map(|l| l.trim().strip_prefix("# ").map(|s| s.trim().to_string())) + }); + + let tags = frontmatter.as_ref().map(|fm| fm.tags.clone()).unwrap_or_default(); + let linked_repo = frontmatter.as_ref().and_then(|fm| fm.repo.clone()); + let fm_raw = frontmatter.map(|fm| fm.raw); + + let note = VaultNote { + id, + path: path.to_string_lossy().to_string(), + title, + content: body.trim().to_string(), + frontmatter: fm_raw, + tags, + outgoing_links: outgoing, + block_refs, + linked_repo, + created_at: Utc::now(), + updated_at: Utc::now(), + }; + + if let Err(e) = crate::registry::vault::save_vault_note(conn, ¬e) { + warn!("Failed to save vault note {}: {}", note.id, e); + } else { + synced += 1; + } + } + Err(e) => { + warn!("Failed to read vault file {:?}: {}", path, e); + } + } + } + } + + info!("Vault scan complete: {} notes synced", synced); + Ok(synced) +} + +/// Legacy API: scan a single vault directory. +/// +/// * `vault_dir` — root of the vault. If `None`, uses the default location. +/// * Returns the number of notes synced. +pub fn scan_vault( + conn: &mut rusqlite::Connection, + vault_dir: Option<&Path>, +) -> anyhow::Result { + let options = ScanOptions { + roots: vault_dir.map(|p| vec![p.to_path_buf()]).unwrap_or_default(), + follow_links: false, + }; + scan_vault_with_options(conn, &options) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::registry::WorkspaceRegistry; + + #[test] + fn test_scan_vault_basic() { + let tmp = std::env::temp_dir().join(format!("devbase_vault_scan_{}", std::process::id())); + std::fs::create_dir_all(&tmp).unwrap(); + std::fs::write( + tmp.join("hello.md"), + "---\ntitle: Hello World\ntags: [rust, cli]\n---\n# Hello World\n\nThis is a [[test]] note.\n", + ) + .unwrap(); + + let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); + let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); + assert_eq!(count, 1); + + std::fs::remove_dir_all(&tmp).unwrap(); + } + + #[test] + fn test_scan_vault_empty_dir() { + let tmp = std::env::temp_dir().join(format!("devbase_vault_empty_{}", std::process::id())); + std::fs::create_dir_all(&tmp).unwrap(); + let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); + let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); + assert_eq!(count, 0); + std::fs::remove_dir_all(&tmp).unwrap(); + } + + #[test] + fn test_scan_vault_missing_dir() { + let tmp = + std::env::temp_dir().join(format!("devbase_vault_missing_{}", std::process::id())); + let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); + let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); + assert_eq!(count, 0); + } +} From 627ef0e01755369e0312f358dce2ccf4b4d809c0 Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 6 Jun 2026 21:26:19 +0800 Subject: [PATCH 05/11] Phase 3: Ontology import from OpenClaw workspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - import_ontology: reads ontology/entities/*.json + relations/*.jsonl - Maps entity_id/type/name → devbase entities table - Maps relation_id/type/from/to → devbase relations table - devkit_ontology_import MCP tool (Beta tier, destructive) - devbase ontology import CLI command (with --dry-run) - MCP tools: 70 → 71 - All 495 tests pass Co-Authored-By: Claude Opus 4.7 --- server.json | 370 +++--- src/commands/mod.rs | 21 +- src/commands/ontology.rs | 55 + src/main.rs | 12 + src/mcp/mod.rs | 2094 +++++++++++++++--------------- src/mcp/tests.rs | 1964 ++++++++++++++-------------- src/mcp/tools/mod.rs | 164 +-- src/mcp/tools/ontology_import.rs | 69 + src/registry.rs | 1075 +++++++-------- src/registry/import_ontology.rs | 197 +++ 10 files changed, 3183 insertions(+), 2838 deletions(-) create mode 100644 src/commands/ontology.rs create mode 100644 src/mcp/tools/ontology_import.rs create mode 100644 src/registry/import_ontology.rs diff --git a/server.json b/server.json index 05d8349..75802e0 100644 --- a/server.json +++ b/server.json @@ -1,184 +1,186 @@ -{ - "$schema": "https://registry.modelcontextprotocol.io/schema/server.json", - "name": "io.github.juice094.devbase", - "version": "0.20.1", - "description": "Developer Knowledge OS — manage Git repos, vault notes (Markdown), and assets. AI-native workspace with 70 MCP tools.", - "license": "AGPL-3.0-or-later", - "homepage": "https://github.com/juice094/devbase", - "repository": { - "type": "git", - "url": "https://github.com/juice094/devbase.git" - }, - "maintainers": [ - { - "name": "juice094", - "email": "160722440+juice094@users.noreply.github.com" - } - ], - "categories": ["developer-tools", "knowledge-management", "version-control"], - "tags": ["git", "rust", "vault", "notes", "mcp"], - "runtime": { - "type": "stdio", - "command": "devbase", - "args": ["mcp"] - }, - "installation": { - "cargo": { - "crate": "devbase", - "bin": "devbase" - } - }, - "capabilities": { - "tools": { - "stable": [ - "devkit_health", - "devkit_query_repos", - "devkit_vault_search", - "devkit_vault_read", - "devkit_project_context" - ], - "beta": [ - "devkit_scan", - "devkit_sync", - "devkit_query", - "devkit_index", - "devkit_index_stream", - "devkit_status", - "devkit_note", - "devkit_digest", - "devkit_paper_index", - "devkit_experiment_log", - "devkit_github_info", - "devkit_arxiv_fetch", - "devkit_code_metrics", - "devkit_module_graph", - "devkit_code_symbols", - "devkit_dependency_graph", - "devkit_call_graph", - "devkit_dead_code", - "devkit_semantic_search", - "devkit_embedding_store", - "devkit_embedding_search", - "devkit_natural_language_query", - "devkit_vault_write", - "devkit_vault_backlinks", - "devkit_vault_daily", - "devkit_vault_graph", - "devkit_vault_export", - "devkit_vault_history", - "devkit_project_brief", - "devkit_impact_analysis", - "devkit_cross_repo_search", - "devkit_knowledge_report", - "devkit_related_symbols", - "devkit_hybrid_search", - "devkit_search_quality", - "devkit_skill_list", - "devkit_skill_search", - "devkit_skill_run", - "devkit_skill_discover", - "devkit_skill_sync", - "devkit_known_limit_store", - "devkit_known_limit_list", - "devkit_relation_store", - "devkit_relation_query", - "devkit_relation_delete", - "devkit_workflow_list", - "devkit_workflow_run", - "devkit_workflow_status", - "devkit_session_save", - "devkit_session_list", - "devkit_session_resume", - "devkit_session_attach", - "devkit_session_detach", - "devkit_session_activate", - "devkit_session_search", - "devkit_session_capture", - "devkit_session_workflows", - "devkit_session_recall", - "devkit_session_index", - "devkit_session_export", - "devkit_session_import", - "devkit_oplog_query", - "devkit_evaluate", - "devkit_document_convert", - "devkit_index_health" - ], - "experimental": [] - } - }, - "tools": { - "count": 70, - "list": [ - "devkit_scan", - "devkit_health", - "devkit_sync", - "devkit_query", - "devkit_query_repos", - "devkit_index", - "devkit_index_stream", - "devkit_status", - "devkit_note", - "devkit_digest", - "devkit_paper_index", - "devkit_experiment_log", - "devkit_github_info", - "devkit_arxiv_fetch", - "devkit_code_metrics", - "devkit_module_graph", - "devkit_code_symbols", - "devkit_dependency_graph", - "devkit_call_graph", - "devkit_dead_code", - "devkit_semantic_search", - "devkit_embedding_store", - "devkit_embedding_search", - "devkit_natural_language_query", - "devkit_vault_search", - "devkit_vault_read", - "devkit_vault_write", - "devkit_vault_backlinks", - "devkit_vault_daily", - "devkit_vault_graph", - "devkit_vault_export", - "devkit_vault_history", - "devkit_project_context", - "devkit_project_brief", - "devkit_impact_analysis", - "devkit_cross_repo_search", - "devkit_knowledge_report", - "devkit_related_symbols", - "devkit_hybrid_search", - "devkit_search_quality", - "devkit_skill_list", - "devkit_skill_search", - "devkit_skill_run", - "devkit_skill_discover", - "devkit_known_limit_store", - "devkit_known_limit_list", - "devkit_relation_store", - "devkit_relation_query", - "devkit_relation_delete", - "devkit_workflow_list", - "devkit_workflow_run", - "devkit_workflow_status", - "devkit_session_save", - "devkit_session_list", - "devkit_session_resume", - "devkit_session_attach", - "devkit_session_detach", - "devkit_session_activate", - "devkit_session_search", - "devkit_session_capture", - "devkit_session_workflows", - "devkit_session_recall", - "devkit_session_index", - "devkit_session_export", - "devkit_session_import", - "devkit_oplog_query", - "devkit_evaluate", - "devkit_document_convert", - "devkit_index_health" - ] - } -} +{ + "$schema": "https://registry.modelcontextprotocol.io/schema/server.json", + "name": "io.github.juice094.devbase", + "version": "0.20.1", + "description": "Developer Knowledge OS — manage Git repos, vault notes (Markdown), and assets. AI-native workspace with 71 MCP tools.", + "license": "AGPL-3.0-or-later", + "homepage": "https://github.com/juice094/devbase", + "repository": { + "type": "git", + "url": "https://github.com/juice094/devbase.git" + }, + "maintainers": [ + { + "name": "juice094", + "email": "160722440+juice094@users.noreply.github.com" + } + ], + "categories": ["developer-tools", "knowledge-management", "version-control"], + "tags": ["git", "rust", "vault", "notes", "mcp"], + "runtime": { + "type": "stdio", + "command": "devbase", + "args": ["mcp"] + }, + "installation": { + "cargo": { + "crate": "devbase", + "bin": "devbase" + } + }, + "capabilities": { + "tools": { + "stable": [ + "devkit_health", + "devkit_query_repos", + "devkit_vault_search", + "devkit_vault_read", + "devkit_project_context" + ], + "beta": [ + "devkit_scan", + "devkit_sync", + "devkit_query", + "devkit_index", + "devkit_index_stream", + "devkit_status", + "devkit_note", + "devkit_digest", + "devkit_paper_index", + "devkit_experiment_log", + "devkit_github_info", + "devkit_arxiv_fetch", + "devkit_code_metrics", + "devkit_module_graph", + "devkit_code_symbols", + "devkit_dependency_graph", + "devkit_call_graph", + "devkit_dead_code", + "devkit_semantic_search", + "devkit_embedding_store", + "devkit_embedding_search", + "devkit_natural_language_query", + "devkit_vault_write", + "devkit_vault_backlinks", + "devkit_vault_daily", + "devkit_vault_graph", + "devkit_vault_export", + "devkit_vault_history", + "devkit_project_brief", + "devkit_impact_analysis", + "devkit_cross_repo_search", + "devkit_knowledge_report", + "devkit_related_symbols", + "devkit_hybrid_search", + "devkit_search_quality", + "devkit_skill_list", + "devkit_skill_search", + "devkit_skill_run", + "devkit_skill_discover", + "devkit_skill_sync", + "devkit_known_limit_store", + "devkit_known_limit_list", + "devkit_relation_store", + "devkit_relation_query", + "devkit_relation_delete", + "devkit_workflow_list", + "devkit_workflow_run", + "devkit_workflow_status", + "devkit_session_save", + "devkit_session_list", + "devkit_session_resume", + "devkit_session_attach", + "devkit_session_detach", + "devkit_session_activate", + "devkit_session_search", + "devkit_session_capture", + "devkit_session_workflows", + "devkit_session_recall", + "devkit_session_index", + "devkit_session_export", + "devkit_session_import", + "devkit_oplog_query", + "devkit_evaluate", + "devkit_document_convert", + "devkit_ontology_import", + "devkit_index_health" + ], + "experimental": [] + } + }, + "tools": { + "count": 71, + "list": [ + "devkit_scan", + "devkit_health", + "devkit_sync", + "devkit_query", + "devkit_query_repos", + "devkit_index", + "devkit_index_stream", + "devkit_status", + "devkit_note", + "devkit_digest", + "devkit_paper_index", + "devkit_experiment_log", + "devkit_github_info", + "devkit_arxiv_fetch", + "devkit_code_metrics", + "devkit_module_graph", + "devkit_code_symbols", + "devkit_dependency_graph", + "devkit_call_graph", + "devkit_dead_code", + "devkit_semantic_search", + "devkit_embedding_store", + "devkit_embedding_search", + "devkit_natural_language_query", + "devkit_vault_search", + "devkit_vault_read", + "devkit_vault_write", + "devkit_vault_backlinks", + "devkit_vault_daily", + "devkit_vault_graph", + "devkit_vault_export", + "devkit_vault_history", + "devkit_project_context", + "devkit_project_brief", + "devkit_impact_analysis", + "devkit_cross_repo_search", + "devkit_knowledge_report", + "devkit_related_symbols", + "devkit_hybrid_search", + "devkit_search_quality", + "devkit_skill_list", + "devkit_skill_search", + "devkit_skill_run", + "devkit_skill_discover", + "devkit_known_limit_store", + "devkit_known_limit_list", + "devkit_relation_store", + "devkit_relation_query", + "devkit_relation_delete", + "devkit_workflow_list", + "devkit_workflow_run", + "devkit_workflow_status", + "devkit_session_save", + "devkit_session_list", + "devkit_session_resume", + "devkit_session_attach", + "devkit_session_detach", + "devkit_session_activate", + "devkit_session_search", + "devkit_session_capture", + "devkit_session_workflows", + "devkit_session_recall", + "devkit_session_index", + "devkit_session_export", + "devkit_session_import", + "devkit_oplog_query", + "devkit_evaluate", + "devkit_document_convert", + "devkit_ontology_import", + "devkit_index_health" + ] + } +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 85e5d6d..666565a 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,10 +1,11 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -pub mod analysis; -pub mod knowledge; -pub mod limit; -pub mod repo; -pub mod simple; -pub mod skill; -pub mod system; -pub mod workflow; +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +pub mod analysis; +pub mod knowledge; +pub mod ontology; +pub mod limit; +pub mod repo; +pub mod simple; +pub mod skill; +pub mod system; +pub mod workflow; diff --git a/src/commands/ontology.rs b/src/commands/ontology.rs new file mode 100644 index 0000000..1762ded --- /dev/null +++ b/src/commands/ontology.rs @@ -0,0 +1,55 @@ +/// Import ontology from an OpenClaw-compatible workspace into devbase registry. +pub fn run_import( + ctx: &mut crate::storage::AppContext, + workspace: &str, + dry_run: bool, +) -> anyhow::Result<()> { + let wp = if workspace.is_empty() { + dirs::home_dir() + .unwrap_or_default() + .join(".kimi_openclaw") + .join("workspace") + } else { + std::path::PathBuf::from(workspace) + }; + + if dry_run { + println!("Dry-run: would import ontology from {}", wp.display()); + if wp.exists() { + let entities_dir = wp.join("ontology").join("entities"); + let relations_file = wp.join("ontology").join("relations").join("core-relations.jsonl"); + if entities_dir.is_dir() { + let count = std::fs::read_dir(&entities_dir)?.count(); + println!(" Entities found: {}", count); + } + if relations_file.exists() { + let lines = std::fs::read_to_string(&relations_file)? + .lines() + .filter(|l| !l.trim().is_empty()) + .count(); + println!(" Relations found: {}", lines); + } + } + return Ok(()); + } + + let conn = ctx.conn()?; + let stats = crate::registry::import_ontology::import_ontology(&conn, &wp)?; + + println!("Ontology import from: {}", wp.display()); + println!(" Entities: {} added, {} updated", stats.entities_added, stats.entities_updated); + println!(" Relations: {} added, {} updated", stats.relations_added, stats.relations_updated); + if !stats.errors.is_empty() { + println!(" Errors: {}", stats.errors.len()); + for e in &stats.errors { + println!(" - {}", e); + } + } + println!( + " Total: {} entities, {} relations", + stats.entities_added + stats.entities_updated, + stats.relations_added + stats.relations_updated, + ); + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 4d35c6c..b649946 100644 --- a/src/main.rs +++ b/src/main.rs @@ -302,6 +302,15 @@ pub(crate) enum Commands { #[command(subcommand)] cmd: SkillCommands, }, + /// Import ontology entities and relations from an OpenClaw workspace + Ontology { + /// Path to OpenClaw workspace (defaults to ~/.kimi_openclaw/workspace) + #[arg(default_value = "")] + workspace: String, + /// Dry-run: list entities/relations without importing + #[arg(long)] + dry_run: bool, + }, /// Workflow Engine — orchestrate multi-Skill pipelines Workflow { #[command(subcommand)] @@ -804,6 +813,9 @@ async fn main() -> anyhow::Result<()> { Commands::Skill { cmd } => { commands::skill::run_skill(&mut ctx, cmd)?; } + Commands::Ontology { workspace, dry_run } => { + commands::ontology::run_import(&mut ctx, &workspace, dry_run)?; + } Commands::Workflow { cmd } => { commands::workflow::run_workflow(&mut ctx, cmd)?; } diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index f352d29..d39c255 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -1,1044 +1,1050 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -use std::collections::{HashMap, HashSet}; -use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; - -pub use tools::*; - -/// Phase of a streaming tool invocation. -#[derive(Debug, Clone, serde::Serialize)] -#[serde(rename_all = "snake_case")] -pub enum StreamPhase { - /// Progress update (e.g., "Indexing repo 3/10..."). - Progress, - /// Intermediate partial result. - Partial, - /// Final result — stream ends after this. - Done, - /// Error occurred — stream ends after this. - Error, -} - -/// A single event in a streaming tool invocation. -#[derive(Debug, Clone, serde::Serialize)] -pub struct ToolStreamEvent { - pub phase: StreamPhase, - pub payload: serde_json::Value, -} - -#[allow(async_fn_in_trait)] -pub trait McpTool: Send + Sync + Clone { - fn name(&self) -> &'static str; - fn schema(&self) -> serde_json::Value; - async fn invoke( - &self, - args: serde_json::Value, - ctx: &mut crate::storage::AppContext, - ) -> anyhow::Result; - - /// Optional streaming interface for long-running operations. - /// - /// Default implementation delegates to `invoke` and emits a single `Done` event. - /// Override this for tools that support progressive output (e.g., indexing, - /// syncing large batches, or long-running analysis). - async fn invoke_stream( - &self, - args: serde_json::Value, - ctx: &mut crate::storage::AppContext, - ) -> anyhow::Result> { - let result = self.invoke(args, ctx).await?; - Ok(vec![ToolStreamEvent { - phase: StreamPhase::Done, - payload: result, - }]) - } -} - -#[derive(Clone)] -pub enum McpToolEnum { - Scan(DevkitScanTool), - Health(DevkitHealthTool), - Sync(DevkitSyncTool), - Query(DevkitQueryTool), - QueryRepos(DevkitQueryReposTool), - Index(DevkitIndexTool), - IndexHealth(DevkitIndexHealthTool), - IndexStream(DevkitIndexStreamTool), - Note(DevkitNoteTool), - Status(DevkitStatusTool), - Digest(DevkitDigestTool), - Paper(DevkitPaperIndexTool), - Experiment(DevkitExperimentLogTool), - GithubInfo(DevkitGithubInfoTool), - CodeMetrics(DevkitCodeMetricsTool), - ModuleGraph(DevkitModuleGraphTool), - NaturalLanguageQuery(DevkitNaturalLanguageQueryTool), - VaultSearch(DevkitVaultSearchTool), - VaultRead(DevkitVaultReadTool), - VaultWrite(DevkitVaultWriteTool), - VaultBacklinks(DevkitVaultBacklinksTool), - VaultDaily(DevkitVaultDailyTool), - VaultGraph(DevkitVaultGraphTool), - VaultExport(DevkitVaultExportTool), - VaultHistory(DevkitVaultHistoryTool), - ProjectContext(DevkitProjectContextTool), - ProjectBrief(DevkitProjectBriefTool), - ImpactAnalysis(DevkitImpactAnalysisTool), - CodeSymbols(DevkitCodeSymbolsTool), - DependencyGraph(DevkitDependencyGraphTool), - CallGraph(DevkitCallGraphTool), - DeadCode(DevkitDeadCodeTool), - SemanticSearch(DevkitSemanticSearchTool), - ArxivFetch(DevkitArxivFetchTool), - EmbeddingStore(DevkitEmbeddingStoreTool), - EmbeddingSearch(DevkitEmbeddingSearchTool), - CrossRepoSearch(DevkitCrossRepoSearchTool), - KnowledgeReport(DevkitKnowledgeReportTool), - RelatedSymbols(DevkitRelatedSymbolsTool), - HybridSearch(DevkitHybridSearchTool), - SearchQuality(DevkitSearchQualityTool), - SkillList(DevkitSkillListTool), - SkillSearch(DevkitSkillSearchTool), - SkillRun(DevkitSkillRunTool), - SkillDiscover(DevkitSkillDiscoverTool), - SkillSync(DevkitSkillSyncTool), - KnownLimitStore(DevkitKnownLimitStoreTool), - KnownLimitList(DevkitKnownLimitListTool), - RelationStore(DevkitRelationStoreTool), - RelationQuery(DevkitRelationQueryTool), - RelationDelete(DevkitRelationDeleteTool), - SessionSave(DevkitSessionSaveTool), - SessionList(DevkitSessionListTool), - SessionResume(DevkitSessionResumeTool), - SessionAttach(DevkitSessionAttachTool), - SessionDetach(DevkitSessionDetachTool), - SessionActivate(DevkitSessionActivateTool), - SessionSearch(DevkitSessionSearchTool), - SessionCapture(DevkitSessionCaptureTool), - SessionWorkflows(DevkitSessionWorkflowsTool), - SessionRecall(DevkitSessionRecallTool), - SessionIndex(DevkitSessionIndexTool), - SessionExport(DevkitSessionExportTool), - SessionImport(DevkitSessionImportTool), - WorkflowList(DevkitWorkflowListTool), - WorkflowRun(DevkitWorkflowRunTool), - WorkflowStatus(DevkitWorkflowStatusTool), - OplogQuery(DevkitOplogQueryTool), - Evaluate(DevkitEvaluateTool), - DocumentConvert(DevkitDocumentConvertTool), -} - -/// Stability tier for MCP tools. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum ToolTier { - Stable, - Beta, - Experimental, -} - -impl std::str::FromStr for ToolTier { - type Err = (); - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "stable" => Ok(ToolTier::Stable), - "beta" => Ok(ToolTier::Beta), - "experimental" => Ok(ToolTier::Experimental), - _ => Err(()), - } - } -} - -impl McpToolEnum { - pub fn tier(&self) -> ToolTier { - match self { - // Stable: battle-tested, schema frozen, unit-tested - McpToolEnum::Health(_) => ToolTier::Stable, - McpToolEnum::QueryRepos(_) => ToolTier::Stable, - McpToolEnum::VaultSearch(_) => ToolTier::Stable, - McpToolEnum::VaultRead(_) => ToolTier::Stable, - McpToolEnum::ProjectContext(_) => ToolTier::Stable, - McpToolEnum::ProjectBrief(_) => ToolTier::Beta, - McpToolEnum::ImpactAnalysis(_) => ToolTier::Beta, - // Beta: validated but schema may微调, limited edge-case tests - McpToolEnum::Scan(_) => ToolTier::Beta, - McpToolEnum::Sync(_) => ToolTier::Beta, - McpToolEnum::Query(_) => ToolTier::Beta, - McpToolEnum::Index(_) => ToolTier::Beta, - McpToolEnum::IndexHealth(_) => ToolTier::Beta, - McpToolEnum::IndexStream(_) => ToolTier::Beta, - McpToolEnum::Status(_) => ToolTier::Beta, - McpToolEnum::Note(_) => ToolTier::Beta, - McpToolEnum::VaultWrite(_) => ToolTier::Beta, - McpToolEnum::VaultBacklinks(_) => ToolTier::Beta, - McpToolEnum::VaultDaily(_) => ToolTier::Beta, - McpToolEnum::VaultGraph(_) => ToolTier::Beta, - McpToolEnum::VaultExport(_) => ToolTier::Beta, - McpToolEnum::VaultHistory(_) => ToolTier::Beta, - McpToolEnum::NaturalLanguageQuery(_) => ToolTier::Beta, - McpToolEnum::GithubInfo(_) => ToolTier::Beta, - // Experimental: new, behavior may change, pending prod validation - McpToolEnum::Digest(_) => ToolTier::Experimental, - McpToolEnum::Paper(_) => ToolTier::Experimental, - McpToolEnum::Experiment(_) => ToolTier::Beta, - McpToolEnum::CodeMetrics(_) => ToolTier::Beta, - McpToolEnum::ModuleGraph(_) => ToolTier::Beta, - McpToolEnum::CodeSymbols(_) => ToolTier::Beta, - McpToolEnum::DependencyGraph(_) => ToolTier::Beta, - McpToolEnum::CallGraph(_) => ToolTier::Beta, - McpToolEnum::DeadCode(_) => ToolTier::Beta, - McpToolEnum::SemanticSearch(_) => ToolTier::Beta, - McpToolEnum::ArxivFetch(_) => ToolTier::Beta, - McpToolEnum::EmbeddingStore(_) => ToolTier::Beta, - McpToolEnum::EmbeddingSearch(_) => ToolTier::Beta, - McpToolEnum::CrossRepoSearch(_) => ToolTier::Beta, - McpToolEnum::KnowledgeReport(_) => ToolTier::Beta, - McpToolEnum::RelatedSymbols(_) => ToolTier::Experimental, - McpToolEnum::HybridSearch(_) => ToolTier::Beta, - McpToolEnum::SearchQuality(_) => ToolTier::Beta, - McpToolEnum::SkillList(_) => ToolTier::Beta, - McpToolEnum::SkillSearch(_) => ToolTier::Beta, - McpToolEnum::SkillRun(_) => ToolTier::Beta, - McpToolEnum::SkillDiscover(_) => ToolTier::Beta, - McpToolEnum::SkillSync(_) => ToolTier::Beta, - McpToolEnum::KnownLimitStore(_) => ToolTier::Beta, - McpToolEnum::KnownLimitList(_) => ToolTier::Beta, - McpToolEnum::RelationStore(_) => ToolTier::Beta, - McpToolEnum::RelationQuery(_) => ToolTier::Beta, - McpToolEnum::RelationDelete(_) => ToolTier::Beta, - McpToolEnum::SessionSave(_) => ToolTier::Beta, - McpToolEnum::SessionList(_) => ToolTier::Beta, - McpToolEnum::SessionResume(_) => ToolTier::Beta, - McpToolEnum::SessionAttach(_) => ToolTier::Beta, - McpToolEnum::SessionDetach(_) => ToolTier::Beta, - McpToolEnum::SessionActivate(_) => ToolTier::Beta, - McpToolEnum::SessionSearch(_) => ToolTier::Beta, - McpToolEnum::SessionCapture(_) => ToolTier::Beta, - McpToolEnum::SessionWorkflows(_) => ToolTier::Beta, - McpToolEnum::SessionRecall(_) => ToolTier::Experimental, - McpToolEnum::SessionIndex(_) => ToolTier::Experimental, - McpToolEnum::SessionExport(_) => ToolTier::Experimental, - McpToolEnum::SessionImport(_) => ToolTier::Experimental, - McpToolEnum::WorkflowList(_) => ToolTier::Beta, - McpToolEnum::WorkflowRun(_) => ToolTier::Beta, - McpToolEnum::WorkflowStatus(_) => ToolTier::Beta, - McpToolEnum::OplogQuery(_) => ToolTier::Beta, - McpToolEnum::Evaluate(_) => ToolTier::Beta, - McpToolEnum::DocumentConvert(_) => ToolTier::Experimental, - } - } -} - -impl McpTool for McpToolEnum { - fn name(&self) -> &'static str { - match self { - McpToolEnum::Scan(t) => t.name(), - McpToolEnum::Health(t) => t.name(), - McpToolEnum::Sync(t) => t.name(), - McpToolEnum::Query(t) => t.name(), - McpToolEnum::QueryRepos(t) => t.name(), - McpToolEnum::Index(t) => t.name(), - McpToolEnum::IndexHealth(t) => t.name(), - McpToolEnum::IndexStream(t) => t.name(), - McpToolEnum::Status(t) => t.name(), - McpToolEnum::Note(t) => t.name(), - McpToolEnum::Digest(t) => t.name(), - McpToolEnum::Paper(t) => t.name(), - McpToolEnum::Experiment(t) => t.name(), - McpToolEnum::GithubInfo(t) => t.name(), - McpToolEnum::CodeMetrics(t) => t.name(), - McpToolEnum::ModuleGraph(t) => t.name(), - McpToolEnum::NaturalLanguageQuery(t) => t.name(), - McpToolEnum::VaultSearch(t) => t.name(), - McpToolEnum::VaultRead(t) => t.name(), - McpToolEnum::VaultWrite(t) => t.name(), - McpToolEnum::VaultBacklinks(t) => t.name(), - McpToolEnum::VaultDaily(t) => t.name(), - McpToolEnum::VaultGraph(t) => t.name(), - McpToolEnum::VaultExport(t) => t.name(), - McpToolEnum::VaultHistory(t) => t.name(), - McpToolEnum::ProjectContext(t) => t.name(), - McpToolEnum::ProjectBrief(t) => t.name(), - McpToolEnum::ImpactAnalysis(t) => t.name(), - McpToolEnum::CodeSymbols(t) => t.name(), - McpToolEnum::DependencyGraph(t) => t.name(), - McpToolEnum::CallGraph(t) => t.name(), - McpToolEnum::DeadCode(t) => t.name(), - McpToolEnum::SemanticSearch(t) => t.name(), - McpToolEnum::ArxivFetch(t) => t.name(), - McpToolEnum::EmbeddingStore(t) => t.name(), - McpToolEnum::EmbeddingSearch(t) => t.name(), - McpToolEnum::CrossRepoSearch(t) => t.name(), - McpToolEnum::KnowledgeReport(t) => t.name(), - McpToolEnum::RelatedSymbols(t) => t.name(), - McpToolEnum::HybridSearch(t) => t.name(), - McpToolEnum::SearchQuality(t) => t.name(), - McpToolEnum::SkillList(t) => t.name(), - McpToolEnum::SkillSearch(t) => t.name(), - McpToolEnum::SkillRun(t) => t.name(), - McpToolEnum::SkillDiscover(t) => t.name(), - McpToolEnum::SkillSync(t) => t.name(), - McpToolEnum::KnownLimitStore(t) => t.name(), - McpToolEnum::KnownLimitList(t) => t.name(), - McpToolEnum::RelationStore(t) => t.name(), - McpToolEnum::RelationQuery(t) => t.name(), - McpToolEnum::RelationDelete(t) => t.name(), - McpToolEnum::SessionSave(t) => t.name(), - McpToolEnum::SessionList(t) => t.name(), - McpToolEnum::SessionResume(t) => t.name(), - McpToolEnum::SessionAttach(t) => t.name(), - McpToolEnum::SessionDetach(t) => t.name(), - McpToolEnum::SessionActivate(t) => t.name(), - McpToolEnum::SessionSearch(t) => t.name(), - McpToolEnum::SessionCapture(t) => t.name(), - McpToolEnum::SessionWorkflows(t) => t.name(), - McpToolEnum::SessionRecall(t) => t.name(), - McpToolEnum::SessionIndex(t) => t.name(), - McpToolEnum::SessionExport(t) => t.name(), - McpToolEnum::SessionImport(t) => t.name(), - McpToolEnum::WorkflowList(t) => t.name(), - McpToolEnum::WorkflowRun(t) => t.name(), - McpToolEnum::WorkflowStatus(t) => t.name(), - McpToolEnum::OplogQuery(t) => t.name(), - McpToolEnum::Evaluate(t) => t.name(), - McpToolEnum::DocumentConvert(t) => t.name(), - } - } - - fn schema(&self) -> serde_json::Value { - match self { - McpToolEnum::Scan(t) => t.schema(), - McpToolEnum::Health(t) => t.schema(), - McpToolEnum::Sync(t) => t.schema(), - McpToolEnum::Query(t) => t.schema(), - McpToolEnum::QueryRepos(t) => t.schema(), - McpToolEnum::Index(t) => t.schema(), - McpToolEnum::IndexHealth(t) => t.schema(), - McpToolEnum::IndexStream(t) => t.schema(), - McpToolEnum::Status(t) => t.schema(), - McpToolEnum::Note(t) => t.schema(), - McpToolEnum::Digest(t) => t.schema(), - McpToolEnum::Paper(t) => t.schema(), - McpToolEnum::Experiment(t) => t.schema(), - McpToolEnum::GithubInfo(t) => t.schema(), - McpToolEnum::CodeMetrics(t) => t.schema(), - McpToolEnum::ModuleGraph(t) => t.schema(), - McpToolEnum::NaturalLanguageQuery(t) => t.schema(), - McpToolEnum::VaultSearch(t) => t.schema(), - McpToolEnum::VaultRead(t) => t.schema(), - McpToolEnum::VaultWrite(t) => t.schema(), - McpToolEnum::VaultBacklinks(t) => t.schema(), - McpToolEnum::VaultDaily(t) => t.schema(), - McpToolEnum::VaultGraph(t) => t.schema(), - McpToolEnum::VaultExport(t) => t.schema(), - McpToolEnum::VaultHistory(t) => t.schema(), - McpToolEnum::ProjectContext(t) => t.schema(), - McpToolEnum::ProjectBrief(t) => t.schema(), - McpToolEnum::ImpactAnalysis(t) => t.schema(), - McpToolEnum::CodeSymbols(t) => t.schema(), - McpToolEnum::DependencyGraph(t) => t.schema(), - McpToolEnum::CallGraph(t) => t.schema(), - McpToolEnum::DeadCode(t) => t.schema(), - McpToolEnum::SemanticSearch(t) => t.schema(), - McpToolEnum::ArxivFetch(t) => t.schema(), - McpToolEnum::EmbeddingStore(t) => t.schema(), - McpToolEnum::EmbeddingSearch(t) => t.schema(), - McpToolEnum::CrossRepoSearch(t) => t.schema(), - McpToolEnum::KnowledgeReport(t) => t.schema(), - McpToolEnum::RelatedSymbols(t) => t.schema(), - McpToolEnum::HybridSearch(t) => t.schema(), - McpToolEnum::SearchQuality(t) => t.schema(), - McpToolEnum::SkillList(t) => t.schema(), - McpToolEnum::SkillSearch(t) => t.schema(), - McpToolEnum::SkillRun(t) => t.schema(), - McpToolEnum::SkillDiscover(t) => t.schema(), - McpToolEnum::SkillSync(t) => t.schema(), - McpToolEnum::KnownLimitStore(t) => t.schema(), - McpToolEnum::KnownLimitList(t) => t.schema(), - McpToolEnum::RelationStore(t) => t.schema(), - McpToolEnum::RelationQuery(t) => t.schema(), - McpToolEnum::RelationDelete(t) => t.schema(), - McpToolEnum::SessionSave(t) => t.schema(), - McpToolEnum::SessionList(t) => t.schema(), - McpToolEnum::SessionResume(t) => t.schema(), - McpToolEnum::SessionAttach(t) => t.schema(), - McpToolEnum::SessionDetach(t) => t.schema(), - McpToolEnum::SessionActivate(t) => t.schema(), - McpToolEnum::SessionSearch(t) => t.schema(), - McpToolEnum::SessionCapture(t) => t.schema(), - McpToolEnum::SessionWorkflows(t) => t.schema(), - McpToolEnum::SessionRecall(t) => t.schema(), - McpToolEnum::SessionIndex(t) => t.schema(), - McpToolEnum::SessionExport(t) => t.schema(), - McpToolEnum::SessionImport(t) => t.schema(), - McpToolEnum::WorkflowList(t) => t.schema(), - McpToolEnum::WorkflowRun(t) => t.schema(), - McpToolEnum::WorkflowStatus(t) => t.schema(), - McpToolEnum::OplogQuery(t) => t.schema(), - McpToolEnum::Evaluate(t) => t.schema(), - McpToolEnum::DocumentConvert(t) => t.schema(), - } - } - - async fn invoke( - &self, - args: serde_json::Value, - ctx: &mut crate::storage::AppContext, - ) -> anyhow::Result { - match self { - McpToolEnum::Scan(t) => t.invoke(args, ctx).await, - McpToolEnum::Health(t) => t.invoke(args, ctx).await, - McpToolEnum::Sync(t) => t.invoke(args, ctx).await, - McpToolEnum::Query(t) => t.invoke(args, ctx).await, - McpToolEnum::QueryRepos(t) => t.invoke(args, ctx).await, - McpToolEnum::Index(t) => t.invoke(args, ctx).await, - McpToolEnum::IndexHealth(t) => t.invoke(args, ctx).await, - McpToolEnum::IndexStream(t) => t.invoke(args, ctx).await, - McpToolEnum::Status(t) => t.invoke(args, ctx).await, - McpToolEnum::Note(t) => t.invoke(args, ctx).await, - McpToolEnum::Digest(t) => t.invoke(args, ctx).await, - McpToolEnum::Paper(t) => t.invoke(args, ctx).await, - McpToolEnum::Experiment(t) => t.invoke(args, ctx).await, - McpToolEnum::GithubInfo(t) => t.invoke(args, ctx).await, - McpToolEnum::CodeMetrics(t) => t.invoke(args, ctx).await, - McpToolEnum::ModuleGraph(t) => t.invoke(args, ctx).await, - McpToolEnum::NaturalLanguageQuery(t) => t.invoke(args, ctx).await, - McpToolEnum::VaultSearch(t) => t.invoke(args, ctx).await, - McpToolEnum::VaultRead(t) => t.invoke(args, ctx).await, - McpToolEnum::VaultWrite(t) => t.invoke(args, ctx).await, - McpToolEnum::VaultBacklinks(t) => t.invoke(args, ctx).await, - McpToolEnum::VaultDaily(t) => t.invoke(args, ctx).await, - McpToolEnum::VaultGraph(t) => t.invoke(args, ctx).await, - McpToolEnum::VaultExport(t) => t.invoke(args, ctx).await, - McpToolEnum::VaultHistory(t) => t.invoke(args, ctx).await, - McpToolEnum::ProjectContext(t) => t.invoke(args, ctx).await, - McpToolEnum::ProjectBrief(t) => t.invoke(args, ctx).await, - McpToolEnum::ImpactAnalysis(t) => t.invoke(args, ctx).await, - McpToolEnum::CodeSymbols(t) => t.invoke(args, ctx).await, - McpToolEnum::DependencyGraph(t) => t.invoke(args, ctx).await, - McpToolEnum::CallGraph(t) => t.invoke(args, ctx).await, - McpToolEnum::DeadCode(t) => t.invoke(args, ctx).await, - McpToolEnum::SemanticSearch(t) => t.invoke(args, ctx).await, - McpToolEnum::ArxivFetch(t) => t.invoke(args, ctx).await, - McpToolEnum::EmbeddingStore(t) => t.invoke(args, ctx).await, - McpToolEnum::EmbeddingSearch(t) => t.invoke(args, ctx).await, - McpToolEnum::CrossRepoSearch(t) => t.invoke(args, ctx).await, - McpToolEnum::KnowledgeReport(t) => t.invoke(args, ctx).await, - McpToolEnum::RelatedSymbols(t) => t.invoke(args, ctx).await, - McpToolEnum::HybridSearch(t) => t.invoke(args, ctx).await, - McpToolEnum::SearchQuality(t) => t.invoke(args, ctx).await, - McpToolEnum::SkillList(t) => t.invoke(args, ctx).await, - McpToolEnum::SkillSearch(t) => t.invoke(args, ctx).await, - McpToolEnum::SkillRun(t) => t.invoke(args, ctx).await, - McpToolEnum::SkillDiscover(t) => t.invoke(args, ctx).await, - McpToolEnum::SkillSync(t) => t.invoke(args, ctx).await, - McpToolEnum::KnownLimitStore(t) => t.invoke(args, ctx).await, - McpToolEnum::KnownLimitList(t) => t.invoke(args, ctx).await, - McpToolEnum::RelationStore(t) => t.invoke(args, ctx).await, - McpToolEnum::RelationQuery(t) => t.invoke(args, ctx).await, - McpToolEnum::RelationDelete(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionSave(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionList(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionResume(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionAttach(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionDetach(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionActivate(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionSearch(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionCapture(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionWorkflows(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionRecall(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionIndex(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionExport(t) => t.invoke(args, ctx).await, - McpToolEnum::SessionImport(t) => t.invoke(args, ctx).await, - McpToolEnum::WorkflowList(t) => t.invoke(args, ctx).await, - McpToolEnum::WorkflowRun(t) => t.invoke(args, ctx).await, - McpToolEnum::WorkflowStatus(t) => t.invoke(args, ctx).await, - McpToolEnum::OplogQuery(t) => t.invoke(args, ctx).await, - McpToolEnum::Evaluate(t) => t.invoke(args, ctx).await, - McpToolEnum::DocumentConvert(t) => t.invoke(args, ctx).await, - } - } -} - -/// Long-lived oplog file handle — opened once, reused across all MCP calls. -static OPLOG_FILE: std::sync::OnceLock>> = - std::sync::OnceLock::new(); - -fn get_oplog_file() -> &'static std::sync::Mutex> { - OPLOG_FILE.get_or_init(|| { - let file = dirs::data_local_dir().and_then(|data_dir| { - let log_path = data_dir.join("devbase").join("mcp-oplog.ndjson"); - std::fs::OpenOptions::new().create(true).append(true).open(&log_path).ok() - }); - std::sync::Mutex::new(file) - }) -} - -/// Append a single MCP tool invocation record to the oplog file. -/// -/// Path: `%LOCALAPPDATA%/devbase/mcp-oplog.ndjson` -/// Format: newline-delimited JSON (NDJSON) -fn append_mcp_oplog(tool_name: &str, duration_ms: u128, success: bool, error_type: Option<&str>) { - let entry = serde_json::json!({ - "timestamp": chrono::Utc::now().to_rfc3339(), - "tool": tool_name, - "duration_ms": duration_ms, - "success": success, - "error_type": error_type, - }); - - if let Ok(mut guard) = get_oplog_file().lock() { - if let Some(ref mut file) = *guard { - use std::io::Write; - if let Err(e) = writeln!(file, "{}", entry) { - tracing::warn!("Failed to write MCP oplog: {}", e); - } - } - } -} - -pub struct McpServer { - tools: HashMap, -} - -impl Default for McpServer { - fn default() -> Self { - Self::new() - } -} - -impl McpServer { - pub fn new() -> Self { - Self { tools: HashMap::new() } - } - - pub fn register_tool(&mut self, tool: McpToolEnum) { - self.tools.insert(tool.name().to_string(), tool); - } - - pub async fn handle_request( - &self, - req: serde_json::Value, - ctx: &mut crate::storage::AppContext, - ) -> anyhow::Result { - let id = req.get("id").cloned().unwrap_or(serde_json::Value::Null); - let method = req.get("method").and_then(|v| v.as_str()).unwrap_or(""); - - match method { - "ping" => Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "result": {} - })), - "initialize" => { - // Verify client protocol version for compatibility - let client_version = req - .get("params") - .and_then(|p| p.get("protocolVersion")) - .and_then(|v| v.as_str()) - .unwrap_or("unknown"); - let supported = ["2024-11-05"]; - if !supported.contains(&client_version) { - tracing::warn!( - "Client protocol version '{}' not in supported list {:?}; proceeding with 2024-11-05", - client_version, - supported - ); - } - Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "result": { - "protocolVersion": "2024-11-05", - "capabilities": { - "tools": {} - }, - "serverInfo": { - "name": "devbase", - "version": env!("CARGO_PKG_VERSION") - } - } - })) - } - "tools/list" => { - let tools: Vec = self - .tools - .values() - .map(|t| { - let schema = t.schema(); - serde_json::json!({ - "name": t.name(), - "description": schema.get("description").and_then(|v| v.as_str()).unwrap_or(""), - "inputSchema": schema.get("inputSchema").cloned().unwrap_or(serde_json::json!({})) - }) - }) - .collect(); - Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "result": { "tools": tools } - })) - } - "tools/call" => { - let params = req.get("params").cloned().unwrap_or(serde_json::Value::Null); - let name = params.get("name").and_then(|v| v.as_str()).unwrap_or(""); - let args = params.get("arguments").cloned().unwrap_or(serde_json::Value::Null); - let stream = params.get("stream").and_then(|v| v.as_bool()).unwrap_or(false); - - match self.tools.get(name) { - Some(_tool) if stream => { - let start = std::time::Instant::now(); - match self.handle_streaming_call(name, args, ctx).await { - Ok(events) => { - append_mcp_oplog(name, start.elapsed().as_millis(), true, None); - let events_json = serde_json::to_string(&events)?; - let content = serde_json::json!({ - "type": "text", - "text": events_json - }); - Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "result": { - "content": [content], - "isError": false - } - })) - } - Err(e) => { - append_mcp_oplog( - name, - start.elapsed().as_millis(), - false, - Some("invoke_error"), - ); - let payload = - serde_json::json!({ "success": false, "error": e.to_string() }); - let text = serde_json::to_string(&payload)?; - let content = serde_json::json!({ "type": "text", "text": text }); - Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "result": { - "content": [content], - "isError": true - } - })) - } - } - } - Some(tool) => { - let start = std::time::Instant::now(); - match tool.invoke(args, ctx).await { - Ok(result) => { - let text = result.to_string(); - let is_error = !result - .get("success") - .and_then(|v: &serde_json::Value| v.as_bool()) - .unwrap_or(true); - append_mcp_oplog( - name, - start.elapsed().as_millis(), - !is_error, - None, - ); - let content = serde_json::json!({ - "type": "text", - "text": text - }); - Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "result": { - "content": [content], - "isError": is_error - } - })) - } - Err(e) => { - append_mcp_oplog( - name, - start.elapsed().as_millis(), - false, - Some("invoke_error"), - ); - let payload = - serde_json::json!({ "success": false, "error": e.to_string() }); - let text = serde_json::to_string(&payload)?; - let content = serde_json::json!({ "type": "text", "text": text }); - Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "result": { - "content": [content], - "isError": true - } - })) - } - } - } - None => Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "error": { - "code": -32602, - "message": format!("Tool '{}' not found", name) - } - })), - } - } - _ => { - if id.is_null() { - // Workaround: Python MCP SDK 1.16.0 cannot parse JSON-RPC - // error responses with `id: null`. Return Null so the - // caller can silently drop it. - return Ok(serde_json::Value::Null); - } - Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "error": { - "code": -32601, - "message": format!("Method '{}' not found", method) - } - })) - } - } - } - - /// Invoke a tool in streaming mode and return a sequence of events. - /// - /// This is used by the SSE transport to push progressive updates. - /// If the tool does not override `invoke_stream`, the default implementation - /// delegates to `invoke` and wraps the result as a single `Done` event. - pub async fn handle_streaming_call( - &self, - name: &str, - args: serde_json::Value, - ctx: &mut crate::storage::AppContext, - ) -> anyhow::Result> { - match self.tools.get(name) { - Some(tool) => tool.invoke_stream(args, ctx).await, - None => Err(anyhow::anyhow!("Tool '{}' not found", name)), - } - } -} - -/// Build an MCP server with optional tier filtering. -/// -/// If `tiers` is `None`, all 69 tools are registered (backward compatible). -/// If `tiers` is provided, only tools whose tier is in the set are registered. -pub fn build_server_with_tiers(tiers: Option<&HashSet>) -> McpServer { - let mut server = McpServer::new(); - let all_tools = [ - McpToolEnum::Scan(DevkitScanTool), - McpToolEnum::Health(DevkitHealthTool), - McpToolEnum::Sync(DevkitSyncTool), - McpToolEnum::Query(DevkitQueryTool), - McpToolEnum::QueryRepos(DevkitQueryReposTool), - McpToolEnum::Index(DevkitIndexTool), - McpToolEnum::IndexHealth(DevkitIndexHealthTool), - McpToolEnum::IndexStream(DevkitIndexStreamTool), - McpToolEnum::Status(DevkitStatusTool), - McpToolEnum::Note(DevkitNoteTool), - McpToolEnum::Digest(DevkitDigestTool), - McpToolEnum::Paper(DevkitPaperIndexTool), - McpToolEnum::Experiment(DevkitExperimentLogTool), - McpToolEnum::GithubInfo(DevkitGithubInfoTool), - McpToolEnum::CodeMetrics(DevkitCodeMetricsTool), - McpToolEnum::ModuleGraph(DevkitModuleGraphTool), - McpToolEnum::NaturalLanguageQuery(DevkitNaturalLanguageQueryTool), - McpToolEnum::VaultSearch(DevkitVaultSearchTool), - McpToolEnum::VaultRead(DevkitVaultReadTool), - McpToolEnum::VaultWrite(DevkitVaultWriteTool), - McpToolEnum::VaultBacklinks(DevkitVaultBacklinksTool), - McpToolEnum::VaultDaily(DevkitVaultDailyTool), - McpToolEnum::VaultGraph(DevkitVaultGraphTool), - McpToolEnum::VaultExport(DevkitVaultExportTool), - McpToolEnum::VaultHistory(DevkitVaultHistoryTool), - McpToolEnum::ProjectContext(DevkitProjectContextTool), - McpToolEnum::ProjectBrief(DevkitProjectBriefTool), - McpToolEnum::ImpactAnalysis(DevkitImpactAnalysisTool), - McpToolEnum::CodeSymbols(DevkitCodeSymbolsTool), - McpToolEnum::DependencyGraph(DevkitDependencyGraphTool), - McpToolEnum::CallGraph(DevkitCallGraphTool), - McpToolEnum::DeadCode(DevkitDeadCodeTool), - McpToolEnum::SemanticSearch(DevkitSemanticSearchTool), - McpToolEnum::ArxivFetch(DevkitArxivFetchTool), - McpToolEnum::EmbeddingStore(DevkitEmbeddingStoreTool), - McpToolEnum::EmbeddingSearch(DevkitEmbeddingSearchTool), - McpToolEnum::CrossRepoSearch(DevkitCrossRepoSearchTool), - McpToolEnum::KnowledgeReport(DevkitKnowledgeReportTool), - McpToolEnum::RelatedSymbols(DevkitRelatedSymbolsTool), - McpToolEnum::HybridSearch(DevkitHybridSearchTool), - McpToolEnum::SearchQuality(DevkitSearchQualityTool), - McpToolEnum::SkillList(DevkitSkillListTool), - McpToolEnum::SkillSearch(DevkitSkillSearchTool), - McpToolEnum::SkillRun(DevkitSkillRunTool), - McpToolEnum::SkillDiscover(DevkitSkillDiscoverTool), - McpToolEnum::SkillSync(DevkitSkillSyncTool), - McpToolEnum::KnownLimitStore(DevkitKnownLimitStoreTool), - McpToolEnum::KnownLimitList(DevkitKnownLimitListTool), - McpToolEnum::RelationStore(DevkitRelationStoreTool), - McpToolEnum::RelationQuery(DevkitRelationQueryTool), - McpToolEnum::RelationDelete(DevkitRelationDeleteTool), - McpToolEnum::SessionSave(DevkitSessionSaveTool), - McpToolEnum::SessionList(DevkitSessionListTool), - McpToolEnum::SessionResume(DevkitSessionResumeTool), - McpToolEnum::SessionAttach(DevkitSessionAttachTool), - McpToolEnum::SessionDetach(DevkitSessionDetachTool), - McpToolEnum::SessionActivate(DevkitSessionActivateTool), - McpToolEnum::SessionSearch(DevkitSessionSearchTool), - McpToolEnum::SessionCapture(DevkitSessionCaptureTool), - McpToolEnum::SessionWorkflows(DevkitSessionWorkflowsTool), - McpToolEnum::SessionRecall(DevkitSessionRecallTool), - McpToolEnum::SessionIndex(DevkitSessionIndexTool), - McpToolEnum::SessionExport(DevkitSessionExportTool), - McpToolEnum::SessionImport(DevkitSessionImportTool), - McpToolEnum::WorkflowList(DevkitWorkflowListTool), - McpToolEnum::WorkflowRun(DevkitWorkflowRunTool), - McpToolEnum::WorkflowStatus(DevkitWorkflowStatusTool), - McpToolEnum::OplogQuery(DevkitOplogQueryTool), - McpToolEnum::Evaluate(DevkitEvaluateTool), - McpToolEnum::DocumentConvert(DevkitDocumentConvertTool), - ]; - for tool in all_tools { - if let Some(allowed) = tiers - && !allowed.contains(&tool.tier()) - { - continue; - } - server.register_tool(tool); - } - server -} - -/// Build an MCP server with all tools (backward compatible). -pub fn build_server() -> McpServer { - build_server_with_tiers(None) -} - -pub fn format_mcp_message(body: &serde_json::Value) -> String { - format_mcp_message_auto(body, false) -} - -/// Format MCP message with optional NDJSON mode (no Content-Length headers). -/// NDJSON mode outputs raw JSON followed by a newline, for clients that -/// expect line-delimited JSON-RPC over stdio. -pub fn format_mcp_message_auto(body: &serde_json::Value, ndjson: bool) -> String { - let body_str = body.to_string(); - if ndjson { - format!("{}\n", body_str) - } else { - format!("Content-Length: {}\r\n\r\n{}", body_str.len(), body_str) - } -} - -/// Check whether destructive MCP tools are enabled via environment variable. -/// Returns Ok(()) if enabled, or an error with a clear message if disabled. -pub(crate) fn check_destructive_enabled() -> anyhow::Result<()> { - let enabled = std::env::var("DEVBASE_MCP_ENABLE_DESTRUCTIVE") - .map(|v| v == "1" || v.eq_ignore_ascii_case("true")) - .unwrap_or(false); - if !enabled { - anyhow::bail!( - "Destructive tools are disabled. \ - Set DEVBASE_MCP_ENABLE_DESTRUCTIVE=1 to enable." - ); - } - Ok(()) -} - -/// Parse tool tiers from a comma-separated string (e.g. "stable,beta"). -fn parse_tool_tiers(s: &str) -> HashSet { - s.split(',') - .map(|t| t.trim()) - .filter(|t| !t.is_empty()) - .filter_map(|s| s.parse().ok()) - .collect() -} - -pub async fn run_stdio() -> anyhow::Result<()> { - let mut ctx = crate::storage::AppContext::with_defaults()?; - let tiers: Option> = std::env::var("DEVBASE_MCP_TOOL_TIERS") - .ok() - .map(|s| parse_tool_tiers(&s)) - .filter(|set| !set.is_empty()); - let server = build_server_with_tiers(tiers.as_ref()); - let stdin = tokio::io::stdin(); - let mut stdout = tokio::io::stdout(); - let mut reader = BufReader::new(stdin); - let mut line_buf = String::new(); - let mut use_ndjson = false; - - loop { - line_buf.clear(); - // Read header line to get Content-Length - let n = reader.read_line(&mut line_buf).await?; - if n == 0 { - break; // EOF - } - let line = line_buf.trim(); - if line.is_empty() { - continue; - } - - let content_length = if line.starts_with("Content-Length: ") { - line.strip_prefix("Content-Length: ").and_then(|v| v.parse::().ok()) - } else { - // Client is using NDJSON (raw JSON lines). Switch to NDJSON output. - use_ndjson = true; - // Fallback: parse raw JSON line for backward compatibility - let req: serde_json::Value = match serde_json::from_str(line) { - Ok(v) => v, - Err(e) => { - let resp = serde_json::json!({ - "jsonrpc": "2.0", - "id": null, - "error": { - "code": -32700, - "message": format!("Parse error: {}", e) - } - }); - let msg = format_mcp_message_auto(&resp, use_ndjson); - if stdout.write_all(msg.as_bytes()).await.is_err() - || stdout.flush().await.is_err() - { - break; - } - continue; - } - }; - let resp = server.handle_request(req, &mut ctx).await.unwrap_or_else(|e| { - serde_json::json!({ - "jsonrpc": "2.0", - "id": null, - "error": { - "code": -32603, - "message": format!("Internal error: {}", e) - } - }) - }); - if !resp.is_null() { - let msg = format_mcp_message_auto(&resp, use_ndjson); - if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() - { - break; - } - } - continue; - }; - - let content_length = match content_length { - Some(len) => len, - None => { - let resp = serde_json::json!({ - "jsonrpc": "2.0", - "id": null, - "error": { - "code": -32700, - "message": format!("Invalid Content-Length header: {}", line) - } - }); - let msg = format_mcp_message_auto(&resp, use_ndjson); - if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() - { - break; - } - continue; - } - }; - - // Read the empty line (\r\n or \n) - line_buf.clear(); - let _ = reader.read_line(&mut line_buf).await; - - // Read the exact number of bytes - let mut body_buf = vec![0u8; content_length]; - if let Err(e) = reader.read_exact(&mut body_buf).await { - let resp = serde_json::json!({ - "jsonrpc": "2.0", - "id": null, - "error": { - "code": -32700, - "message": format!("Failed to read request body: {}", e) - } - }); - let msg = format_mcp_message_auto(&resp, use_ndjson); - if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() { - break; - } - continue; - } - - let req: serde_json::Value = match String::from_utf8(body_buf) { - Ok(body) => match serde_json::from_str(&body) { - Ok(v) => v, - Err(e) => { - let resp = serde_json::json!({ - "jsonrpc": "2.0", - "id": null, - "error": { - "code": -32700, - "message": format!("Parse error: {}", e) - } - }); - let msg = format_mcp_message_auto(&resp, use_ndjson); - if stdout.write_all(msg.as_bytes()).await.is_err() - || stdout.flush().await.is_err() - { - break; // broken pipe - } - continue; - } - }, - Err(e) => { - let resp = serde_json::json!({ - "jsonrpc": "2.0", - "id": null, - "error": { - "code": -32700, - "message": format!("Invalid UTF-8: {}", e) - } - }); - let msg = format_mcp_message_auto(&resp, use_ndjson); - if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() - { - break; // broken pipe - } - continue; - } - }; - - // Notifications have no "id" field and require no response. - let is_notification = req.get("id").is_none(); - if is_notification { - // Silently acknowledge all notifications (not just notifications/*). - continue; - } - - let resp = server.handle_request(req, &mut ctx).await.unwrap_or_else(|e| { - serde_json::json!({ - "jsonrpc": "2.0", - "id": null, - "error": { - "code": -32603, - "message": format!("Internal error: {}", e) - } - }) - }); - - if !resp.is_null() { - let msg = format_mcp_message_auto(&resp, use_ndjson); - if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() { - break; // broken pipe - } - } - } - - Ok(()) -} - -pub use crate::clients::*; -#[cfg(test)] -pub mod tests; -pub mod tools; +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use std::collections::{HashMap, HashSet}; +use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; + +pub use tools::*; + +/// Phase of a streaming tool invocation. +#[derive(Debug, Clone, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub enum StreamPhase { + /// Progress update (e.g., "Indexing repo 3/10..."). + Progress, + /// Intermediate partial result. + Partial, + /// Final result — stream ends after this. + Done, + /// Error occurred — stream ends after this. + Error, +} + +/// A single event in a streaming tool invocation. +#[derive(Debug, Clone, serde::Serialize)] +pub struct ToolStreamEvent { + pub phase: StreamPhase, + pub payload: serde_json::Value, +} + +#[allow(async_fn_in_trait)] +pub trait McpTool: Send + Sync + Clone { + fn name(&self) -> &'static str; + fn schema(&self) -> serde_json::Value; + async fn invoke( + &self, + args: serde_json::Value, + ctx: &mut crate::storage::AppContext, + ) -> anyhow::Result; + + /// Optional streaming interface for long-running operations. + /// + /// Default implementation delegates to `invoke` and emits a single `Done` event. + /// Override this for tools that support progressive output (e.g., indexing, + /// syncing large batches, or long-running analysis). + async fn invoke_stream( + &self, + args: serde_json::Value, + ctx: &mut crate::storage::AppContext, + ) -> anyhow::Result> { + let result = self.invoke(args, ctx).await?; + Ok(vec![ToolStreamEvent { + phase: StreamPhase::Done, + payload: result, + }]) + } +} + +#[derive(Clone)] +pub enum McpToolEnum { + Scan(DevkitScanTool), + Health(DevkitHealthTool), + Sync(DevkitSyncTool), + Query(DevkitQueryTool), + QueryRepos(DevkitQueryReposTool), + Index(DevkitIndexTool), + IndexHealth(DevkitIndexHealthTool), + IndexStream(DevkitIndexStreamTool), + Note(DevkitNoteTool), + Status(DevkitStatusTool), + Digest(DevkitDigestTool), + Paper(DevkitPaperIndexTool), + Experiment(DevkitExperimentLogTool), + GithubInfo(DevkitGithubInfoTool), + CodeMetrics(DevkitCodeMetricsTool), + ModuleGraph(DevkitModuleGraphTool), + NaturalLanguageQuery(DevkitNaturalLanguageQueryTool), + VaultSearch(DevkitVaultSearchTool), + VaultRead(DevkitVaultReadTool), + VaultWrite(DevkitVaultWriteTool), + VaultBacklinks(DevkitVaultBacklinksTool), + VaultDaily(DevkitVaultDailyTool), + VaultGraph(DevkitVaultGraphTool), + VaultExport(DevkitVaultExportTool), + VaultHistory(DevkitVaultHistoryTool), + ProjectContext(DevkitProjectContextTool), + ProjectBrief(DevkitProjectBriefTool), + ImpactAnalysis(DevkitImpactAnalysisTool), + CodeSymbols(DevkitCodeSymbolsTool), + DependencyGraph(DevkitDependencyGraphTool), + CallGraph(DevkitCallGraphTool), + DeadCode(DevkitDeadCodeTool), + SemanticSearch(DevkitSemanticSearchTool), + ArxivFetch(DevkitArxivFetchTool), + EmbeddingStore(DevkitEmbeddingStoreTool), + EmbeddingSearch(DevkitEmbeddingSearchTool), + CrossRepoSearch(DevkitCrossRepoSearchTool), + KnowledgeReport(DevkitKnowledgeReportTool), + RelatedSymbols(DevkitRelatedSymbolsTool), + HybridSearch(DevkitHybridSearchTool), + SearchQuality(DevkitSearchQualityTool), + SkillList(DevkitSkillListTool), + SkillSearch(DevkitSkillSearchTool), + SkillRun(DevkitSkillRunTool), + SkillDiscover(DevkitSkillDiscoverTool), + SkillSync(DevkitSkillSyncTool), + KnownLimitStore(DevkitKnownLimitStoreTool), + KnownLimitList(DevkitKnownLimitListTool), + RelationStore(DevkitRelationStoreTool), + RelationQuery(DevkitRelationQueryTool), + RelationDelete(DevkitRelationDeleteTool), + SessionSave(DevkitSessionSaveTool), + SessionList(DevkitSessionListTool), + SessionResume(DevkitSessionResumeTool), + SessionAttach(DevkitSessionAttachTool), + SessionDetach(DevkitSessionDetachTool), + SessionActivate(DevkitSessionActivateTool), + SessionSearch(DevkitSessionSearchTool), + SessionCapture(DevkitSessionCaptureTool), + SessionWorkflows(DevkitSessionWorkflowsTool), + SessionRecall(DevkitSessionRecallTool), + SessionIndex(DevkitSessionIndexTool), + SessionExport(DevkitSessionExportTool), + SessionImport(DevkitSessionImportTool), + WorkflowList(DevkitWorkflowListTool), + WorkflowRun(DevkitWorkflowRunTool), + WorkflowStatus(DevkitWorkflowStatusTool), + OplogQuery(DevkitOplogQueryTool), + Evaluate(DevkitEvaluateTool), + DocumentConvert(DevkitDocumentConvertTool), + OntologyImport(DevkitOntologyImportTool), +} + +/// Stability tier for MCP tools. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ToolTier { + Stable, + Beta, + Experimental, +} + +impl std::str::FromStr for ToolTier { + type Err = (); + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "stable" => Ok(ToolTier::Stable), + "beta" => Ok(ToolTier::Beta), + "experimental" => Ok(ToolTier::Experimental), + _ => Err(()), + } + } +} + +impl McpToolEnum { + pub fn tier(&self) -> ToolTier { + match self { + // Stable: battle-tested, schema frozen, unit-tested + McpToolEnum::Health(_) => ToolTier::Stable, + McpToolEnum::QueryRepos(_) => ToolTier::Stable, + McpToolEnum::VaultSearch(_) => ToolTier::Stable, + McpToolEnum::VaultRead(_) => ToolTier::Stable, + McpToolEnum::ProjectContext(_) => ToolTier::Stable, + McpToolEnum::ProjectBrief(_) => ToolTier::Beta, + McpToolEnum::ImpactAnalysis(_) => ToolTier::Beta, + // Beta: validated but schema may微调, limited edge-case tests + McpToolEnum::Scan(_) => ToolTier::Beta, + McpToolEnum::Sync(_) => ToolTier::Beta, + McpToolEnum::Query(_) => ToolTier::Beta, + McpToolEnum::Index(_) => ToolTier::Beta, + McpToolEnum::IndexHealth(_) => ToolTier::Beta, + McpToolEnum::IndexStream(_) => ToolTier::Beta, + McpToolEnum::Status(_) => ToolTier::Beta, + McpToolEnum::Note(_) => ToolTier::Beta, + McpToolEnum::VaultWrite(_) => ToolTier::Beta, + McpToolEnum::VaultBacklinks(_) => ToolTier::Beta, + McpToolEnum::VaultDaily(_) => ToolTier::Beta, + McpToolEnum::VaultGraph(_) => ToolTier::Beta, + McpToolEnum::VaultExport(_) => ToolTier::Beta, + McpToolEnum::VaultHistory(_) => ToolTier::Beta, + McpToolEnum::NaturalLanguageQuery(_) => ToolTier::Beta, + McpToolEnum::GithubInfo(_) => ToolTier::Beta, + // Experimental: new, behavior may change, pending prod validation + McpToolEnum::Digest(_) => ToolTier::Experimental, + McpToolEnum::Paper(_) => ToolTier::Experimental, + McpToolEnum::Experiment(_) => ToolTier::Beta, + McpToolEnum::CodeMetrics(_) => ToolTier::Beta, + McpToolEnum::ModuleGraph(_) => ToolTier::Beta, + McpToolEnum::CodeSymbols(_) => ToolTier::Beta, + McpToolEnum::DependencyGraph(_) => ToolTier::Beta, + McpToolEnum::CallGraph(_) => ToolTier::Beta, + McpToolEnum::DeadCode(_) => ToolTier::Beta, + McpToolEnum::SemanticSearch(_) => ToolTier::Beta, + McpToolEnum::ArxivFetch(_) => ToolTier::Beta, + McpToolEnum::EmbeddingStore(_) => ToolTier::Beta, + McpToolEnum::EmbeddingSearch(_) => ToolTier::Beta, + McpToolEnum::CrossRepoSearch(_) => ToolTier::Beta, + McpToolEnum::KnowledgeReport(_) => ToolTier::Beta, + McpToolEnum::RelatedSymbols(_) => ToolTier::Experimental, + McpToolEnum::HybridSearch(_) => ToolTier::Beta, + McpToolEnum::SearchQuality(_) => ToolTier::Beta, + McpToolEnum::SkillList(_) => ToolTier::Beta, + McpToolEnum::SkillSearch(_) => ToolTier::Beta, + McpToolEnum::SkillRun(_) => ToolTier::Beta, + McpToolEnum::SkillDiscover(_) => ToolTier::Beta, + McpToolEnum::SkillSync(_) => ToolTier::Beta, + McpToolEnum::KnownLimitStore(_) => ToolTier::Beta, + McpToolEnum::KnownLimitList(_) => ToolTier::Beta, + McpToolEnum::RelationStore(_) => ToolTier::Beta, + McpToolEnum::RelationQuery(_) => ToolTier::Beta, + McpToolEnum::RelationDelete(_) => ToolTier::Beta, + McpToolEnum::SessionSave(_) => ToolTier::Beta, + McpToolEnum::SessionList(_) => ToolTier::Beta, + McpToolEnum::SessionResume(_) => ToolTier::Beta, + McpToolEnum::SessionAttach(_) => ToolTier::Beta, + McpToolEnum::SessionDetach(_) => ToolTier::Beta, + McpToolEnum::SessionActivate(_) => ToolTier::Beta, + McpToolEnum::SessionSearch(_) => ToolTier::Beta, + McpToolEnum::SessionCapture(_) => ToolTier::Beta, + McpToolEnum::SessionWorkflows(_) => ToolTier::Beta, + McpToolEnum::SessionRecall(_) => ToolTier::Experimental, + McpToolEnum::SessionIndex(_) => ToolTier::Experimental, + McpToolEnum::SessionExport(_) => ToolTier::Experimental, + McpToolEnum::SessionImport(_) => ToolTier::Experimental, + McpToolEnum::WorkflowList(_) => ToolTier::Beta, + McpToolEnum::WorkflowRun(_) => ToolTier::Beta, + McpToolEnum::WorkflowStatus(_) => ToolTier::Beta, + McpToolEnum::OplogQuery(_) => ToolTier::Beta, + McpToolEnum::Evaluate(_) => ToolTier::Beta, + McpToolEnum::DocumentConvert(_) => ToolTier::Experimental, + McpToolEnum::OntologyImport(_) => ToolTier::Beta, + } + } +} + +impl McpTool for McpToolEnum { + fn name(&self) -> &'static str { + match self { + McpToolEnum::Scan(t) => t.name(), + McpToolEnum::Health(t) => t.name(), + McpToolEnum::Sync(t) => t.name(), + McpToolEnum::Query(t) => t.name(), + McpToolEnum::QueryRepos(t) => t.name(), + McpToolEnum::Index(t) => t.name(), + McpToolEnum::IndexHealth(t) => t.name(), + McpToolEnum::IndexStream(t) => t.name(), + McpToolEnum::Status(t) => t.name(), + McpToolEnum::Note(t) => t.name(), + McpToolEnum::Digest(t) => t.name(), + McpToolEnum::Paper(t) => t.name(), + McpToolEnum::Experiment(t) => t.name(), + McpToolEnum::GithubInfo(t) => t.name(), + McpToolEnum::CodeMetrics(t) => t.name(), + McpToolEnum::ModuleGraph(t) => t.name(), + McpToolEnum::NaturalLanguageQuery(t) => t.name(), + McpToolEnum::VaultSearch(t) => t.name(), + McpToolEnum::VaultRead(t) => t.name(), + McpToolEnum::VaultWrite(t) => t.name(), + McpToolEnum::VaultBacklinks(t) => t.name(), + McpToolEnum::VaultDaily(t) => t.name(), + McpToolEnum::VaultGraph(t) => t.name(), + McpToolEnum::VaultExport(t) => t.name(), + McpToolEnum::VaultHistory(t) => t.name(), + McpToolEnum::ProjectContext(t) => t.name(), + McpToolEnum::ProjectBrief(t) => t.name(), + McpToolEnum::ImpactAnalysis(t) => t.name(), + McpToolEnum::CodeSymbols(t) => t.name(), + McpToolEnum::DependencyGraph(t) => t.name(), + McpToolEnum::CallGraph(t) => t.name(), + McpToolEnum::DeadCode(t) => t.name(), + McpToolEnum::SemanticSearch(t) => t.name(), + McpToolEnum::ArxivFetch(t) => t.name(), + McpToolEnum::EmbeddingStore(t) => t.name(), + McpToolEnum::EmbeddingSearch(t) => t.name(), + McpToolEnum::CrossRepoSearch(t) => t.name(), + McpToolEnum::KnowledgeReport(t) => t.name(), + McpToolEnum::RelatedSymbols(t) => t.name(), + McpToolEnum::HybridSearch(t) => t.name(), + McpToolEnum::SearchQuality(t) => t.name(), + McpToolEnum::SkillList(t) => t.name(), + McpToolEnum::SkillSearch(t) => t.name(), + McpToolEnum::SkillRun(t) => t.name(), + McpToolEnum::SkillDiscover(t) => t.name(), + McpToolEnum::SkillSync(t) => t.name(), + McpToolEnum::KnownLimitStore(t) => t.name(), + McpToolEnum::KnownLimitList(t) => t.name(), + McpToolEnum::RelationStore(t) => t.name(), + McpToolEnum::RelationQuery(t) => t.name(), + McpToolEnum::RelationDelete(t) => t.name(), + McpToolEnum::SessionSave(t) => t.name(), + McpToolEnum::SessionList(t) => t.name(), + McpToolEnum::SessionResume(t) => t.name(), + McpToolEnum::SessionAttach(t) => t.name(), + McpToolEnum::SessionDetach(t) => t.name(), + McpToolEnum::SessionActivate(t) => t.name(), + McpToolEnum::SessionSearch(t) => t.name(), + McpToolEnum::SessionCapture(t) => t.name(), + McpToolEnum::SessionWorkflows(t) => t.name(), + McpToolEnum::SessionRecall(t) => t.name(), + McpToolEnum::SessionIndex(t) => t.name(), + McpToolEnum::SessionExport(t) => t.name(), + McpToolEnum::SessionImport(t) => t.name(), + McpToolEnum::WorkflowList(t) => t.name(), + McpToolEnum::WorkflowRun(t) => t.name(), + McpToolEnum::WorkflowStatus(t) => t.name(), + McpToolEnum::OplogQuery(t) => t.name(), + McpToolEnum::Evaluate(t) => t.name(), + McpToolEnum::DocumentConvert(t) => t.name(), + McpToolEnum::OntologyImport(t) => t.name(), + } + } + + fn schema(&self) -> serde_json::Value { + match self { + McpToolEnum::Scan(t) => t.schema(), + McpToolEnum::Health(t) => t.schema(), + McpToolEnum::Sync(t) => t.schema(), + McpToolEnum::Query(t) => t.schema(), + McpToolEnum::QueryRepos(t) => t.schema(), + McpToolEnum::Index(t) => t.schema(), + McpToolEnum::IndexHealth(t) => t.schema(), + McpToolEnum::IndexStream(t) => t.schema(), + McpToolEnum::Status(t) => t.schema(), + McpToolEnum::Note(t) => t.schema(), + McpToolEnum::Digest(t) => t.schema(), + McpToolEnum::Paper(t) => t.schema(), + McpToolEnum::Experiment(t) => t.schema(), + McpToolEnum::GithubInfo(t) => t.schema(), + McpToolEnum::CodeMetrics(t) => t.schema(), + McpToolEnum::ModuleGraph(t) => t.schema(), + McpToolEnum::NaturalLanguageQuery(t) => t.schema(), + McpToolEnum::VaultSearch(t) => t.schema(), + McpToolEnum::VaultRead(t) => t.schema(), + McpToolEnum::VaultWrite(t) => t.schema(), + McpToolEnum::VaultBacklinks(t) => t.schema(), + McpToolEnum::VaultDaily(t) => t.schema(), + McpToolEnum::VaultGraph(t) => t.schema(), + McpToolEnum::VaultExport(t) => t.schema(), + McpToolEnum::VaultHistory(t) => t.schema(), + McpToolEnum::ProjectContext(t) => t.schema(), + McpToolEnum::ProjectBrief(t) => t.schema(), + McpToolEnum::ImpactAnalysis(t) => t.schema(), + McpToolEnum::CodeSymbols(t) => t.schema(), + McpToolEnum::DependencyGraph(t) => t.schema(), + McpToolEnum::CallGraph(t) => t.schema(), + McpToolEnum::DeadCode(t) => t.schema(), + McpToolEnum::SemanticSearch(t) => t.schema(), + McpToolEnum::ArxivFetch(t) => t.schema(), + McpToolEnum::EmbeddingStore(t) => t.schema(), + McpToolEnum::EmbeddingSearch(t) => t.schema(), + McpToolEnum::CrossRepoSearch(t) => t.schema(), + McpToolEnum::KnowledgeReport(t) => t.schema(), + McpToolEnum::RelatedSymbols(t) => t.schema(), + McpToolEnum::HybridSearch(t) => t.schema(), + McpToolEnum::SearchQuality(t) => t.schema(), + McpToolEnum::SkillList(t) => t.schema(), + McpToolEnum::SkillSearch(t) => t.schema(), + McpToolEnum::SkillRun(t) => t.schema(), + McpToolEnum::SkillDiscover(t) => t.schema(), + McpToolEnum::SkillSync(t) => t.schema(), + McpToolEnum::KnownLimitStore(t) => t.schema(), + McpToolEnum::KnownLimitList(t) => t.schema(), + McpToolEnum::RelationStore(t) => t.schema(), + McpToolEnum::RelationQuery(t) => t.schema(), + McpToolEnum::RelationDelete(t) => t.schema(), + McpToolEnum::SessionSave(t) => t.schema(), + McpToolEnum::SessionList(t) => t.schema(), + McpToolEnum::SessionResume(t) => t.schema(), + McpToolEnum::SessionAttach(t) => t.schema(), + McpToolEnum::SessionDetach(t) => t.schema(), + McpToolEnum::SessionActivate(t) => t.schema(), + McpToolEnum::SessionSearch(t) => t.schema(), + McpToolEnum::SessionCapture(t) => t.schema(), + McpToolEnum::SessionWorkflows(t) => t.schema(), + McpToolEnum::SessionRecall(t) => t.schema(), + McpToolEnum::SessionIndex(t) => t.schema(), + McpToolEnum::SessionExport(t) => t.schema(), + McpToolEnum::SessionImport(t) => t.schema(), + McpToolEnum::WorkflowList(t) => t.schema(), + McpToolEnum::WorkflowRun(t) => t.schema(), + McpToolEnum::WorkflowStatus(t) => t.schema(), + McpToolEnum::OplogQuery(t) => t.schema(), + McpToolEnum::Evaluate(t) => t.schema(), + McpToolEnum::DocumentConvert(t) => t.schema(), + McpToolEnum::OntologyImport(t) => t.schema(), + } + } + + async fn invoke( + &self, + args: serde_json::Value, + ctx: &mut crate::storage::AppContext, + ) -> anyhow::Result { + match self { + McpToolEnum::Scan(t) => t.invoke(args, ctx).await, + McpToolEnum::Health(t) => t.invoke(args, ctx).await, + McpToolEnum::Sync(t) => t.invoke(args, ctx).await, + McpToolEnum::Query(t) => t.invoke(args, ctx).await, + McpToolEnum::QueryRepos(t) => t.invoke(args, ctx).await, + McpToolEnum::Index(t) => t.invoke(args, ctx).await, + McpToolEnum::IndexHealth(t) => t.invoke(args, ctx).await, + McpToolEnum::IndexStream(t) => t.invoke(args, ctx).await, + McpToolEnum::Status(t) => t.invoke(args, ctx).await, + McpToolEnum::Note(t) => t.invoke(args, ctx).await, + McpToolEnum::Digest(t) => t.invoke(args, ctx).await, + McpToolEnum::Paper(t) => t.invoke(args, ctx).await, + McpToolEnum::Experiment(t) => t.invoke(args, ctx).await, + McpToolEnum::GithubInfo(t) => t.invoke(args, ctx).await, + McpToolEnum::CodeMetrics(t) => t.invoke(args, ctx).await, + McpToolEnum::ModuleGraph(t) => t.invoke(args, ctx).await, + McpToolEnum::NaturalLanguageQuery(t) => t.invoke(args, ctx).await, + McpToolEnum::VaultSearch(t) => t.invoke(args, ctx).await, + McpToolEnum::VaultRead(t) => t.invoke(args, ctx).await, + McpToolEnum::VaultWrite(t) => t.invoke(args, ctx).await, + McpToolEnum::VaultBacklinks(t) => t.invoke(args, ctx).await, + McpToolEnum::VaultDaily(t) => t.invoke(args, ctx).await, + McpToolEnum::VaultGraph(t) => t.invoke(args, ctx).await, + McpToolEnum::VaultExport(t) => t.invoke(args, ctx).await, + McpToolEnum::VaultHistory(t) => t.invoke(args, ctx).await, + McpToolEnum::ProjectContext(t) => t.invoke(args, ctx).await, + McpToolEnum::ProjectBrief(t) => t.invoke(args, ctx).await, + McpToolEnum::ImpactAnalysis(t) => t.invoke(args, ctx).await, + McpToolEnum::CodeSymbols(t) => t.invoke(args, ctx).await, + McpToolEnum::DependencyGraph(t) => t.invoke(args, ctx).await, + McpToolEnum::CallGraph(t) => t.invoke(args, ctx).await, + McpToolEnum::DeadCode(t) => t.invoke(args, ctx).await, + McpToolEnum::SemanticSearch(t) => t.invoke(args, ctx).await, + McpToolEnum::ArxivFetch(t) => t.invoke(args, ctx).await, + McpToolEnum::EmbeddingStore(t) => t.invoke(args, ctx).await, + McpToolEnum::EmbeddingSearch(t) => t.invoke(args, ctx).await, + McpToolEnum::CrossRepoSearch(t) => t.invoke(args, ctx).await, + McpToolEnum::KnowledgeReport(t) => t.invoke(args, ctx).await, + McpToolEnum::RelatedSymbols(t) => t.invoke(args, ctx).await, + McpToolEnum::HybridSearch(t) => t.invoke(args, ctx).await, + McpToolEnum::SearchQuality(t) => t.invoke(args, ctx).await, + McpToolEnum::SkillList(t) => t.invoke(args, ctx).await, + McpToolEnum::SkillSearch(t) => t.invoke(args, ctx).await, + McpToolEnum::SkillRun(t) => t.invoke(args, ctx).await, + McpToolEnum::SkillDiscover(t) => t.invoke(args, ctx).await, + McpToolEnum::SkillSync(t) => t.invoke(args, ctx).await, + McpToolEnum::KnownLimitStore(t) => t.invoke(args, ctx).await, + McpToolEnum::KnownLimitList(t) => t.invoke(args, ctx).await, + McpToolEnum::RelationStore(t) => t.invoke(args, ctx).await, + McpToolEnum::RelationQuery(t) => t.invoke(args, ctx).await, + McpToolEnum::RelationDelete(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionSave(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionList(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionResume(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionAttach(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionDetach(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionActivate(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionSearch(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionCapture(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionWorkflows(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionRecall(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionIndex(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionExport(t) => t.invoke(args, ctx).await, + McpToolEnum::SessionImport(t) => t.invoke(args, ctx).await, + McpToolEnum::WorkflowList(t) => t.invoke(args, ctx).await, + McpToolEnum::WorkflowRun(t) => t.invoke(args, ctx).await, + McpToolEnum::WorkflowStatus(t) => t.invoke(args, ctx).await, + McpToolEnum::OplogQuery(t) => t.invoke(args, ctx).await, + McpToolEnum::Evaluate(t) => t.invoke(args, ctx).await, + McpToolEnum::DocumentConvert(t) => t.invoke(args, ctx).await, + McpToolEnum::OntologyImport(t) => t.invoke(args, ctx).await, + } + } +} + +/// Long-lived oplog file handle — opened once, reused across all MCP calls. +static OPLOG_FILE: std::sync::OnceLock>> = + std::sync::OnceLock::new(); + +fn get_oplog_file() -> &'static std::sync::Mutex> { + OPLOG_FILE.get_or_init(|| { + let file = dirs::data_local_dir().and_then(|data_dir| { + let log_path = data_dir.join("devbase").join("mcp-oplog.ndjson"); + std::fs::OpenOptions::new().create(true).append(true).open(&log_path).ok() + }); + std::sync::Mutex::new(file) + }) +} + +/// Append a single MCP tool invocation record to the oplog file. +/// +/// Path: `%LOCALAPPDATA%/devbase/mcp-oplog.ndjson` +/// Format: newline-delimited JSON (NDJSON) +fn append_mcp_oplog(tool_name: &str, duration_ms: u128, success: bool, error_type: Option<&str>) { + let entry = serde_json::json!({ + "timestamp": chrono::Utc::now().to_rfc3339(), + "tool": tool_name, + "duration_ms": duration_ms, + "success": success, + "error_type": error_type, + }); + + if let Ok(mut guard) = get_oplog_file().lock() { + if let Some(ref mut file) = *guard { + use std::io::Write; + if let Err(e) = writeln!(file, "{}", entry) { + tracing::warn!("Failed to write MCP oplog: {}", e); + } + } + } +} + +pub struct McpServer { + tools: HashMap, +} + +impl Default for McpServer { + fn default() -> Self { + Self::new() + } +} + +impl McpServer { + pub fn new() -> Self { + Self { tools: HashMap::new() } + } + + pub fn register_tool(&mut self, tool: McpToolEnum) { + self.tools.insert(tool.name().to_string(), tool); + } + + pub async fn handle_request( + &self, + req: serde_json::Value, + ctx: &mut crate::storage::AppContext, + ) -> anyhow::Result { + let id = req.get("id").cloned().unwrap_or(serde_json::Value::Null); + let method = req.get("method").and_then(|v| v.as_str()).unwrap_or(""); + + match method { + "ping" => Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "result": {} + })), + "initialize" => { + // Verify client protocol version for compatibility + let client_version = req + .get("params") + .and_then(|p| p.get("protocolVersion")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); + let supported = ["2024-11-05"]; + if !supported.contains(&client_version) { + tracing::warn!( + "Client protocol version '{}' not in supported list {:?}; proceeding with 2024-11-05", + client_version, + supported + ); + } + Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "result": { + "protocolVersion": "2024-11-05", + "capabilities": { + "tools": {} + }, + "serverInfo": { + "name": "devbase", + "version": env!("CARGO_PKG_VERSION") + } + } + })) + } + "tools/list" => { + let tools: Vec = self + .tools + .values() + .map(|t| { + let schema = t.schema(); + serde_json::json!({ + "name": t.name(), + "description": schema.get("description").and_then(|v| v.as_str()).unwrap_or(""), + "inputSchema": schema.get("inputSchema").cloned().unwrap_or(serde_json::json!({})) + }) + }) + .collect(); + Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "result": { "tools": tools } + })) + } + "tools/call" => { + let params = req.get("params").cloned().unwrap_or(serde_json::Value::Null); + let name = params.get("name").and_then(|v| v.as_str()).unwrap_or(""); + let args = params.get("arguments").cloned().unwrap_or(serde_json::Value::Null); + let stream = params.get("stream").and_then(|v| v.as_bool()).unwrap_or(false); + + match self.tools.get(name) { + Some(_tool) if stream => { + let start = std::time::Instant::now(); + match self.handle_streaming_call(name, args, ctx).await { + Ok(events) => { + append_mcp_oplog(name, start.elapsed().as_millis(), true, None); + let events_json = serde_json::to_string(&events)?; + let content = serde_json::json!({ + "type": "text", + "text": events_json + }); + Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "result": { + "content": [content], + "isError": false + } + })) + } + Err(e) => { + append_mcp_oplog( + name, + start.elapsed().as_millis(), + false, + Some("invoke_error"), + ); + let payload = + serde_json::json!({ "success": false, "error": e.to_string() }); + let text = serde_json::to_string(&payload)?; + let content = serde_json::json!({ "type": "text", "text": text }); + Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "result": { + "content": [content], + "isError": true + } + })) + } + } + } + Some(tool) => { + let start = std::time::Instant::now(); + match tool.invoke(args, ctx).await { + Ok(result) => { + let text = result.to_string(); + let is_error = !result + .get("success") + .and_then(|v: &serde_json::Value| v.as_bool()) + .unwrap_or(true); + append_mcp_oplog( + name, + start.elapsed().as_millis(), + !is_error, + None, + ); + let content = serde_json::json!({ + "type": "text", + "text": text + }); + Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "result": { + "content": [content], + "isError": is_error + } + })) + } + Err(e) => { + append_mcp_oplog( + name, + start.elapsed().as_millis(), + false, + Some("invoke_error"), + ); + let payload = + serde_json::json!({ "success": false, "error": e.to_string() }); + let text = serde_json::to_string(&payload)?; + let content = serde_json::json!({ "type": "text", "text": text }); + Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "result": { + "content": [content], + "isError": true + } + })) + } + } + } + None => Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "error": { + "code": -32602, + "message": format!("Tool '{}' not found", name) + } + })), + } + } + _ => { + if id.is_null() { + // Workaround: Python MCP SDK 1.16.0 cannot parse JSON-RPC + // error responses with `id: null`. Return Null so the + // caller can silently drop it. + return Ok(serde_json::Value::Null); + } + Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "error": { + "code": -32601, + "message": format!("Method '{}' not found", method) + } + })) + } + } + } + + /// Invoke a tool in streaming mode and return a sequence of events. + /// + /// This is used by the SSE transport to push progressive updates. + /// If the tool does not override `invoke_stream`, the default implementation + /// delegates to `invoke` and wraps the result as a single `Done` event. + pub async fn handle_streaming_call( + &self, + name: &str, + args: serde_json::Value, + ctx: &mut crate::storage::AppContext, + ) -> anyhow::Result> { + match self.tools.get(name) { + Some(tool) => tool.invoke_stream(args, ctx).await, + None => Err(anyhow::anyhow!("Tool '{}' not found", name)), + } + } +} + +/// Build an MCP server with optional tier filtering. +/// +/// If `tiers` is `None`, all 69 tools are registered (backward compatible). +/// If `tiers` is provided, only tools whose tier is in the set are registered. +pub fn build_server_with_tiers(tiers: Option<&HashSet>) -> McpServer { + let mut server = McpServer::new(); + let all_tools = [ + McpToolEnum::Scan(DevkitScanTool), + McpToolEnum::Health(DevkitHealthTool), + McpToolEnum::Sync(DevkitSyncTool), + McpToolEnum::Query(DevkitQueryTool), + McpToolEnum::QueryRepos(DevkitQueryReposTool), + McpToolEnum::Index(DevkitIndexTool), + McpToolEnum::IndexHealth(DevkitIndexHealthTool), + McpToolEnum::IndexStream(DevkitIndexStreamTool), + McpToolEnum::Status(DevkitStatusTool), + McpToolEnum::Note(DevkitNoteTool), + McpToolEnum::Digest(DevkitDigestTool), + McpToolEnum::Paper(DevkitPaperIndexTool), + McpToolEnum::Experiment(DevkitExperimentLogTool), + McpToolEnum::GithubInfo(DevkitGithubInfoTool), + McpToolEnum::CodeMetrics(DevkitCodeMetricsTool), + McpToolEnum::ModuleGraph(DevkitModuleGraphTool), + McpToolEnum::NaturalLanguageQuery(DevkitNaturalLanguageQueryTool), + McpToolEnum::VaultSearch(DevkitVaultSearchTool), + McpToolEnum::VaultRead(DevkitVaultReadTool), + McpToolEnum::VaultWrite(DevkitVaultWriteTool), + McpToolEnum::VaultBacklinks(DevkitVaultBacklinksTool), + McpToolEnum::VaultDaily(DevkitVaultDailyTool), + McpToolEnum::VaultGraph(DevkitVaultGraphTool), + McpToolEnum::VaultExport(DevkitVaultExportTool), + McpToolEnum::VaultHistory(DevkitVaultHistoryTool), + McpToolEnum::ProjectContext(DevkitProjectContextTool), + McpToolEnum::ProjectBrief(DevkitProjectBriefTool), + McpToolEnum::ImpactAnalysis(DevkitImpactAnalysisTool), + McpToolEnum::CodeSymbols(DevkitCodeSymbolsTool), + McpToolEnum::DependencyGraph(DevkitDependencyGraphTool), + McpToolEnum::CallGraph(DevkitCallGraphTool), + McpToolEnum::DeadCode(DevkitDeadCodeTool), + McpToolEnum::SemanticSearch(DevkitSemanticSearchTool), + McpToolEnum::ArxivFetch(DevkitArxivFetchTool), + McpToolEnum::EmbeddingStore(DevkitEmbeddingStoreTool), + McpToolEnum::EmbeddingSearch(DevkitEmbeddingSearchTool), + McpToolEnum::CrossRepoSearch(DevkitCrossRepoSearchTool), + McpToolEnum::KnowledgeReport(DevkitKnowledgeReportTool), + McpToolEnum::RelatedSymbols(DevkitRelatedSymbolsTool), + McpToolEnum::HybridSearch(DevkitHybridSearchTool), + McpToolEnum::SearchQuality(DevkitSearchQualityTool), + McpToolEnum::SkillList(DevkitSkillListTool), + McpToolEnum::SkillSearch(DevkitSkillSearchTool), + McpToolEnum::SkillRun(DevkitSkillRunTool), + McpToolEnum::SkillDiscover(DevkitSkillDiscoverTool), + McpToolEnum::SkillSync(DevkitSkillSyncTool), + McpToolEnum::KnownLimitStore(DevkitKnownLimitStoreTool), + McpToolEnum::KnownLimitList(DevkitKnownLimitListTool), + McpToolEnum::RelationStore(DevkitRelationStoreTool), + McpToolEnum::RelationQuery(DevkitRelationQueryTool), + McpToolEnum::RelationDelete(DevkitRelationDeleteTool), + McpToolEnum::SessionSave(DevkitSessionSaveTool), + McpToolEnum::SessionList(DevkitSessionListTool), + McpToolEnum::SessionResume(DevkitSessionResumeTool), + McpToolEnum::SessionAttach(DevkitSessionAttachTool), + McpToolEnum::SessionDetach(DevkitSessionDetachTool), + McpToolEnum::SessionActivate(DevkitSessionActivateTool), + McpToolEnum::SessionSearch(DevkitSessionSearchTool), + McpToolEnum::SessionCapture(DevkitSessionCaptureTool), + McpToolEnum::SessionWorkflows(DevkitSessionWorkflowsTool), + McpToolEnum::SessionRecall(DevkitSessionRecallTool), + McpToolEnum::SessionIndex(DevkitSessionIndexTool), + McpToolEnum::SessionExport(DevkitSessionExportTool), + McpToolEnum::SessionImport(DevkitSessionImportTool), + McpToolEnum::WorkflowList(DevkitWorkflowListTool), + McpToolEnum::WorkflowRun(DevkitWorkflowRunTool), + McpToolEnum::WorkflowStatus(DevkitWorkflowStatusTool), + McpToolEnum::OplogQuery(DevkitOplogQueryTool), + McpToolEnum::Evaluate(DevkitEvaluateTool), + McpToolEnum::DocumentConvert(DevkitDocumentConvertTool), + McpToolEnum::OntologyImport(DevkitOntologyImportTool), + ]; + for tool in all_tools { + if let Some(allowed) = tiers + && !allowed.contains(&tool.tier()) + { + continue; + } + server.register_tool(tool); + } + server +} + +/// Build an MCP server with all tools (backward compatible). +pub fn build_server() -> McpServer { + build_server_with_tiers(None) +} + +pub fn format_mcp_message(body: &serde_json::Value) -> String { + format_mcp_message_auto(body, false) +} + +/// Format MCP message with optional NDJSON mode (no Content-Length headers). +/// NDJSON mode outputs raw JSON followed by a newline, for clients that +/// expect line-delimited JSON-RPC over stdio. +pub fn format_mcp_message_auto(body: &serde_json::Value, ndjson: bool) -> String { + let body_str = body.to_string(); + if ndjson { + format!("{}\n", body_str) + } else { + format!("Content-Length: {}\r\n\r\n{}", body_str.len(), body_str) + } +} + +/// Check whether destructive MCP tools are enabled via environment variable. +/// Returns Ok(()) if enabled, or an error with a clear message if disabled. +pub(crate) fn check_destructive_enabled() -> anyhow::Result<()> { + let enabled = std::env::var("DEVBASE_MCP_ENABLE_DESTRUCTIVE") + .map(|v| v == "1" || v.eq_ignore_ascii_case("true")) + .unwrap_or(false); + if !enabled { + anyhow::bail!( + "Destructive tools are disabled. \ + Set DEVBASE_MCP_ENABLE_DESTRUCTIVE=1 to enable." + ); + } + Ok(()) +} + +/// Parse tool tiers from a comma-separated string (e.g. "stable,beta"). +fn parse_tool_tiers(s: &str) -> HashSet { + s.split(',') + .map(|t| t.trim()) + .filter(|t| !t.is_empty()) + .filter_map(|s| s.parse().ok()) + .collect() +} + +pub async fn run_stdio() -> anyhow::Result<()> { + let mut ctx = crate::storage::AppContext::with_defaults()?; + let tiers: Option> = std::env::var("DEVBASE_MCP_TOOL_TIERS") + .ok() + .map(|s| parse_tool_tiers(&s)) + .filter(|set| !set.is_empty()); + let server = build_server_with_tiers(tiers.as_ref()); + let stdin = tokio::io::stdin(); + let mut stdout = tokio::io::stdout(); + let mut reader = BufReader::new(stdin); + let mut line_buf = String::new(); + let mut use_ndjson = false; + + loop { + line_buf.clear(); + // Read header line to get Content-Length + let n = reader.read_line(&mut line_buf).await?; + if n == 0 { + break; // EOF + } + let line = line_buf.trim(); + if line.is_empty() { + continue; + } + + let content_length = if line.starts_with("Content-Length: ") { + line.strip_prefix("Content-Length: ").and_then(|v| v.parse::().ok()) + } else { + // Client is using NDJSON (raw JSON lines). Switch to NDJSON output. + use_ndjson = true; + // Fallback: parse raw JSON line for backward compatibility + let req: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(e) => { + let resp = serde_json::json!({ + "jsonrpc": "2.0", + "id": null, + "error": { + "code": -32700, + "message": format!("Parse error: {}", e) + } + }); + let msg = format_mcp_message_auto(&resp, use_ndjson); + if stdout.write_all(msg.as_bytes()).await.is_err() + || stdout.flush().await.is_err() + { + break; + } + continue; + } + }; + let resp = server.handle_request(req, &mut ctx).await.unwrap_or_else(|e| { + serde_json::json!({ + "jsonrpc": "2.0", + "id": null, + "error": { + "code": -32603, + "message": format!("Internal error: {}", e) + } + }) + }); + if !resp.is_null() { + let msg = format_mcp_message_auto(&resp, use_ndjson); + if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() + { + break; + } + } + continue; + }; + + let content_length = match content_length { + Some(len) => len, + None => { + let resp = serde_json::json!({ + "jsonrpc": "2.0", + "id": null, + "error": { + "code": -32700, + "message": format!("Invalid Content-Length header: {}", line) + } + }); + let msg = format_mcp_message_auto(&resp, use_ndjson); + if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() + { + break; + } + continue; + } + }; + + // Read the empty line (\r\n or \n) + line_buf.clear(); + let _ = reader.read_line(&mut line_buf).await; + + // Read the exact number of bytes + let mut body_buf = vec![0u8; content_length]; + if let Err(e) = reader.read_exact(&mut body_buf).await { + let resp = serde_json::json!({ + "jsonrpc": "2.0", + "id": null, + "error": { + "code": -32700, + "message": format!("Failed to read request body: {}", e) + } + }); + let msg = format_mcp_message_auto(&resp, use_ndjson); + if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() { + break; + } + continue; + } + + let req: serde_json::Value = match String::from_utf8(body_buf) { + Ok(body) => match serde_json::from_str(&body) { + Ok(v) => v, + Err(e) => { + let resp = serde_json::json!({ + "jsonrpc": "2.0", + "id": null, + "error": { + "code": -32700, + "message": format!("Parse error: {}", e) + } + }); + let msg = format_mcp_message_auto(&resp, use_ndjson); + if stdout.write_all(msg.as_bytes()).await.is_err() + || stdout.flush().await.is_err() + { + break; // broken pipe + } + continue; + } + }, + Err(e) => { + let resp = serde_json::json!({ + "jsonrpc": "2.0", + "id": null, + "error": { + "code": -32700, + "message": format!("Invalid UTF-8: {}", e) + } + }); + let msg = format_mcp_message_auto(&resp, use_ndjson); + if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() + { + break; // broken pipe + } + continue; + } + }; + + // Notifications have no "id" field and require no response. + let is_notification = req.get("id").is_none(); + if is_notification { + // Silently acknowledge all notifications (not just notifications/*). + continue; + } + + let resp = server.handle_request(req, &mut ctx).await.unwrap_or_else(|e| { + serde_json::json!({ + "jsonrpc": "2.0", + "id": null, + "error": { + "code": -32603, + "message": format!("Internal error: {}", e) + } + }) + }); + + if !resp.is_null() { + let msg = format_mcp_message_auto(&resp, use_ndjson); + if stdout.write_all(msg.as_bytes()).await.is_err() || stdout.flush().await.is_err() { + break; // broken pipe + } + } + } + + Ok(()) +} + +pub use crate::clients::*; +#[cfg(test)] +pub mod tests; +pub mod tools; diff --git a/src/mcp/tests.rs b/src/mcp/tests.rs index f413240..defd275 100644 --- a/src/mcp/tests.rs +++ b/src/mcp/tests.rs @@ -1,982 +1,982 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -use super::*; -use crate::storage::StorageBackend; - -fn test_ctx() -> (crate::storage::AppContext, tempfile::TempDir) { - let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); - let ctx = crate::storage::AppContext::with_storage(backend).unwrap(); - // Return a dummy TempDir to keep call-site destructuring compatible. - let tmp = tempfile::tempdir().unwrap(); - (ctx, tmp) -} - -/// Lightweight helper: seed a single repo into the entities table. -fn seed_repo(ctx: &crate::storage::AppContext, id: &str, lang: &str) { - let conn = ctx.conn().unwrap(); - let now = chrono::Utc::now().to_rfc3339(); - conn.execute( - "INSERT INTO entities (id, entity_type, name, local_path, metadata, created_at, updated_at, language, discovered_at, workspace_type, data_tier, stars) - VALUES (?1, 'repo', ?2, ?3, ?4, ?5, ?5, ?6, ?5, 'git', 'private', 0)", - rusqlite::params![id, id, format!("/tmp/{}", id), "{}", &now, lang], - ).unwrap(); -} - -#[tokio::test] -async fn test_initialize() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 1, - "method": "initialize" - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - assert_eq!(resp.get("jsonrpc").unwrap(), "2.0"); - let result = resp.get("result").unwrap(); - assert_eq!(result.get("protocolVersion").unwrap(), "2024-11-05"); - assert_eq!(result.get("serverInfo").unwrap().get("name").unwrap(), "devbase"); - assert!(result.get("capabilities").unwrap().get("tools").is_some()); -} - -#[tokio::test] -async fn test_tools_list() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 2, - "method": "tools/list" - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let tools = resp.get("result").unwrap().get("tools").unwrap().as_array().unwrap(); - assert_eq!(tools.len(), 70); - let names: Vec<&str> = tools.iter().map(|t| t.get("name").unwrap().as_str().unwrap()).collect(); - assert!(names.contains(&"devkit_index_health")); - assert!(names.contains(&"devkit_vault_export")); - assert!(names.contains(&"devkit_vault_history")); - assert!(names.contains(&"devkit_search_quality")); - assert!(names.contains(&"devkit_session_save")); - assert!(names.contains(&"devkit_session_list")); - assert!(names.contains(&"devkit_session_resume")); - assert!(names.contains(&"devkit_session_recall")); - assert!(names.contains(&"devkit_session_index")); - assert!(names.contains(&"devkit_session_export")); - assert!(names.contains(&"devkit_session_import")); - assert!(names.contains(&"devkit_evaluate")); - assert!(names.contains(&"devkit_document_convert")); - assert!(names.contains(&"devkit_scan")); - assert!(names.contains(&"devkit_health")); - assert!(names.contains(&"devkit_sync")); - assert!(names.contains(&"devkit_query")); - assert!(names.contains(&"devkit_query_repos")); - assert!(names.contains(&"devkit_index")); - assert!(names.contains(&"devkit_index_stream")); - assert!(names.contains(&"devkit_status")); - assert!(names.contains(&"devkit_note")); - assert!(names.contains(&"devkit_digest")); - assert!(names.contains(&"devkit_paper_index")); - assert!(names.contains(&"devkit_experiment_log")); - assert!(names.contains(&"devkit_github_info")); - assert!(names.contains(&"devkit_code_metrics")); - assert!(names.contains(&"devkit_module_graph")); - assert!(names.contains(&"devkit_code_symbols")); - assert!(names.contains(&"devkit_dependency_graph")); - assert!(names.contains(&"devkit_call_graph")); - assert!(names.contains(&"devkit_dead_code")); - assert!(names.contains(&"devkit_semantic_search")); - assert!(names.contains(&"devkit_embedding_store")); - assert!(names.contains(&"devkit_embedding_search")); - assert!(names.contains(&"devkit_natural_language_query")); - assert!(names.contains(&"devkit_vault_search")); - assert!(names.contains(&"devkit_vault_read")); - assert!(names.contains(&"devkit_vault_write")); - assert!(names.contains(&"devkit_vault_backlinks")); - assert!(names.contains(&"devkit_vault_daily")); - assert!(names.contains(&"devkit_vault_graph")); - assert!(names.contains(&"devkit_project_context")); - assert!(names.contains(&"devkit_project_brief")); - assert!(names.contains(&"devkit_impact_analysis")); - assert!(names.contains(&"devkit_cross_repo_search")); - assert!(names.contains(&"devkit_knowledge_report")); - assert!(names.contains(&"devkit_related_symbols")); - assert!(names.contains(&"devkit_hybrid_search")); - assert!(names.contains(&"devkit_skill_list")); - assert!(names.contains(&"devkit_skill_search")); - assert!(names.contains(&"devkit_skill_run")); - assert!(names.contains(&"devkit_skill_discover")); - assert!(names.contains(&"devkit_skill_sync")); - assert!(names.contains(&"devkit_known_limit_store")); - assert!(names.contains(&"devkit_known_limit_list")); - assert!(names.contains(&"devkit_relation_store")); - assert!(names.contains(&"devkit_relation_query")); - assert!(names.contains(&"devkit_relation_delete")); - assert!(names.contains(&"devkit_workflow_list")); - assert!(names.contains(&"devkit_workflow_run")); - assert!(names.contains(&"devkit_workflow_status")); - for tool in tools { - assert!(tool.get("name").is_some()); - assert!(tool.get("description").is_some()); - assert!(tool.get("inputSchema").is_some()); - } -} - -#[tokio::test] -async fn test_tools_call_devkit_health() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 3, - "method": "tools/call", - "params": { - "name": "devkit_health", - "arguments": { "detail": false } - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - if parsed.get("success").unwrap() != &serde_json::Value::Bool(true) { - eprintln!( - "devkit_health returned error: {}", - serde_json::to_string_pretty(&parsed).unwrap() - ); - } - assert_eq!(parsed.get("success").unwrap(), true); - let summary = parsed.get("summary").unwrap(); - assert!(summary.get("total_repos").unwrap().as_i64().unwrap() >= 0); -} - -#[tokio::test] -async fn test_tools_call_devkit_query() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 4, - "method": "tools/call", - "params": { - "name": "devkit_query", - "arguments": { "expression": "lang:rust" } - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert!(parsed.get("count").unwrap().as_i64().unwrap() >= 0); -} - -#[tokio::test] -async fn test_tools_call_unknown_tool() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 5, - "method": "tools/call", - "params": { - "name": "unknown_tool", - "arguments": {} - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - assert!(resp.get("error").is_some()); - let error = resp.get("error").unwrap(); - assert_eq!(error.get("code").unwrap().as_i64().unwrap(), -32602); -} - -#[tokio::test] -async fn test_unknown_method() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 6, - "method": "unknown/method" - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - assert!(resp.get("error").is_some()); - let error = resp.get("error").unwrap(); - assert_eq!(error.get("code").unwrap().as_i64().unwrap(), -32601); -} - -#[tokio::test] -async fn test_tools_call_devkit_project_context() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 7, - "method": "tools/call", - "params": { - "name": "devkit_project_context", - "arguments": { "project": "nonexistent-project-xyz" } - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - assert_eq!(result.get("content").unwrap().as_array().unwrap().len(), 1); - let text = result["content"][0]["text"].as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert!(parsed.get("repo").unwrap().is_null()); - assert!(parsed.get("vault_notes").unwrap().as_array().unwrap().is_empty()); - assert!(parsed.get("assets").unwrap().as_array().unwrap().is_empty()); -} - -#[tokio::test] -async fn test_tools_call_devkit_query_repos() { - let server = build_server(); - let (mut ctx, _tmp) = test_ctx(); - - // 1. Empty registry returns empty results - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 10, - "method": "tools/call", - "params": { - "name": "devkit_query_repos", - "arguments": { "language": "" } - } - }); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert_eq!(parsed.get("count").unwrap().as_i64().unwrap(), 0); - - // 2. Seeded repo is returned with correct filtering - seed_repo(&ctx, "test-repo", "rust"); - let req2 = serde_json::json!({ - "jsonrpc": "2.0", - "id": 11, - "method": "tools/call", - "params": { - "name": "devkit_query_repos", - "arguments": { "language": "rust" } - } - }); - let resp2 = server.handle_request(req2, &mut ctx).await.unwrap(); - let result2 = resp2.get("result").unwrap(); - let content2 = result2.get("content").unwrap().as_array().unwrap(); - let text2 = content2[0].get("text").unwrap().as_str().unwrap(); - let parsed2: serde_json::Value = serde_json::from_str(text2).unwrap(); - assert_eq!(parsed2.get("success").unwrap(), true); - let repos = parsed2.get("repos").unwrap().as_array().unwrap(); - assert_eq!(repos.len(), 1); - assert_eq!(repos[0].get("id").unwrap().as_str().unwrap(), "test-repo"); - assert_eq!(repos[0].get("language").unwrap().as_str().unwrap(), "rust"); -} - -#[tokio::test] -async fn test_tools_call_devkit_vault_search() { - let server = build_server(); - let (mut ctx, _tmp) = test_ctx(); - - // Setup: create vault note and scan - let ws = ctx.storage.workspace_dir().unwrap(); - let vault_dir = ws.join("vault"); - std::fs::create_dir_all(&vault_dir).unwrap(); - std::fs::write( - vault_dir.join("test-note.md"), - "---\ntitle: Test Note\ntags: [test, vault]\n---\n\nThis is a test note for vault search.\n", - ).unwrap(); - let mut conn = ctx.conn().unwrap(); - crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); - - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 12, - "method": "tools/call", - "params": { - "name": "devkit_vault_search", - "arguments": { "query": "test note" } - } - }); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - let notes = parsed.get("notes").unwrap().as_array().unwrap(); - assert!(!notes.is_empty(), "vault_search should find the test-note"); - assert!( - notes - .iter() - .any(|n| n.get("title").and_then(|v| v.as_str()) == Some("Test Note")), - "vault_search should return Test Note" - ); -} - -#[tokio::test] -async fn test_tools_call_devkit_vault_read() { - let server = build_server(); - let (mut ctx, _tmp) = test_ctx(); - - // Setup: create vault note and scan - let ws = ctx.storage.workspace_dir().unwrap(); - let vault_dir = ws.join("vault"); - std::fs::create_dir_all(&vault_dir).unwrap(); - let note_path = vault_dir.join("test-read.md"); - std::fs::write( - ¬e_path, - "---\ntitle: Readable Note\ntags: [read]\n---\n\nContent body here.\n", - ) - .unwrap(); - let mut conn = ctx.conn().unwrap(); - crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); - - // 1. Read existing note by absolute path - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 13, - "method": "tools/call", - "params": { - "name": "devkit_vault_read", - "arguments": { "path": note_path.to_str().unwrap() } - } - }); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert_eq!(parsed.get("path").unwrap().as_str().unwrap(), note_path.to_str().unwrap()); - let frontmatter = parsed.get("frontmatter").unwrap().as_str().unwrap(); - assert!(frontmatter.contains("title: Readable Note")); - let body = parsed.get("content").unwrap().as_str().unwrap(); - assert!(body.contains("Content body here.")); - - // 2. Read non-existent note returns error - let req2 = serde_json::json!({ - "jsonrpc": "2.0", - "id": 14, - "method": "tools/call", - "params": { - "name": "devkit_vault_read", - "arguments": { "path": "/nonexistent/path/note.md" } - } - }); - let resp2 = server.handle_request(req2, &mut ctx).await.unwrap(); - let result2 = resp2.get("result").unwrap(); - assert_eq!(result2.get("isError").unwrap(), true); - let content2 = result2.get("content").unwrap().as_array().unwrap(); - let text2 = content2[0].get("text").unwrap().as_str().unwrap(); - let parsed2: serde_json::Value = serde_json::from_str(text2).unwrap(); - assert_eq!(parsed2.get("success").unwrap(), false); - assert!( - parsed2.get("error").unwrap().as_str().unwrap().contains("not found") - || parsed2.get("error").unwrap().as_str().unwrap().contains("unreadable") - ); -} - -#[tokio::test] -async fn test_tools_call_devkit_arxiv_fetch() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 8, - "method": "tools/call", - "params": { - "name": "devkit_arxiv_fetch", - "arguments": { "arxiv_id": "" } - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - // Empty arxiv_id should result in an error from the arXiv API or parser - assert_eq!(parsed.get("success").unwrap(), false); - assert!(!parsed.get("error").unwrap().as_str().unwrap().is_empty()); -} - -#[tokio::test] -async fn test_tools_call_devkit_status() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 15, - "method": "tools/call", - "params": { - "name": "devkit_status", - "arguments": {} - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - // Empty registry → overall "fresh" (vacuous truth: all 0 repos are fresh) - assert_eq!(parsed.get("overall").unwrap().as_str().unwrap(), "fresh"); - let repos = parsed.get("repos").unwrap().as_array().unwrap(); - assert!(repos.is_empty()); -} - -#[tokio::test] -async fn test_tools_call_devkit_workflow_list() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 16, - "method": "tools/call", - "params": { - "name": "devkit_workflow_list", - "arguments": {} - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert_eq!(parsed.get("count").unwrap().as_i64().unwrap(), 0); - let workflows = parsed.get("workflows").unwrap().as_array().unwrap(); - assert!(workflows.is_empty()); -} - -#[tokio::test] -async fn test_tools_call_devkit_index() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 17, - "method": "tools/call", - "params": { - "name": "devkit_index", - "arguments": { "path": "" } - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - // Empty registry → indexed 0 repos - assert_eq!(parsed.get("indexed").unwrap().as_i64().unwrap(), 0); -} - -#[tokio::test] -async fn test_tools_call_devkit_skill_list() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 9, - "method": "tools/call", - "params": { - "name": "devkit_skill_list", - "arguments": {} - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert!(parsed.get("skills").unwrap().is_array()); - assert!(parsed.get("count").unwrap().as_i64().unwrap() >= 0); -} - -#[tokio::test] -async fn test_tools_call_devkit_skill_search() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 10, - "method": "tools/call", - "params": { - "name": "devkit_skill_search", - "arguments": { "query": "report" } - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert!(parsed.get("skills").unwrap().is_array()); - assert!(parsed.get("count").unwrap().as_i64().unwrap() >= 0); -} - -#[tokio::test] -async fn test_tools_call_devkit_skill_discover() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 11, - "method": "tools/call", - "params": { - "name": "devkit_skill_discover", - "arguments": { - "path": ".", - "skill_id": "mcp-test-discover", - "dry_run": true - } - } - }); - // SAFETY: test-only env var mutation; test runner guarantees no concurrent - // reads of DEVBASE_MCP_ENABLE_DESTRUCTIVE in this process. - unsafe { - std::env::set_var("DEVBASE_MCP_ENABLE_DESTRUCTIVE", "1"); - } - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert!(!parsed.get("id").unwrap().as_str().unwrap().is_empty()); - assert!(!parsed.get("name").unwrap().as_str().unwrap().is_empty()); - assert!(parsed.get("version").unwrap().as_str().is_some()); - assert!(parsed.get("category").is_some()); -} - -#[test] -fn test_destructive_gate_disabled_by_default() { - // Ensure the variable is unset - // SAFETY: test-only env var mutation; no concurrent reads of this var. - unsafe { - std::env::remove_var("DEVBASE_MCP_ENABLE_DESTRUCTIVE"); - } - let result = crate::mcp::check_destructive_enabled(); - assert!(result.is_err()); - let msg = result.unwrap_err().to_string(); - assert!(msg.contains("DEVBASE_MCP_ENABLE_DESTRUCTIVE")); -} - -#[test] -fn test_destructive_gate_enabled() { - // SAFETY: test-only env var mutation; no concurrent reads of this var. - unsafe { - std::env::set_var("DEVBASE_MCP_ENABLE_DESTRUCTIVE", "1"); - } - let result = crate::mcp::check_destructive_enabled(); - assert!(result.is_ok()); - // Cleanup - // SAFETY: test-only env var mutation; no concurrent reads of this var. - unsafe { - std::env::remove_var("DEVBASE_MCP_ENABLE_DESTRUCTIVE"); - } -} - -#[tokio::test] -#[ignore = "requires knowledge-report skill installed and may run external Python process"] -async fn test_tools_call_devkit_skill_run() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 11, - "method": "tools/call", - "params": { - "name": "devkit_skill_run", - "arguments": { - "skill_id": "knowledge-report", - "args": { "repo_id": "devbase" } - } - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), true); - assert!(parsed.get("status").is_some()); - assert!(parsed.get("stdout").is_some()); -} - -#[tokio::test] -async fn test_stdio_content_length_format() { - let body = serde_json::json!({ "jsonrpc": "2.0", "id": 1, "result": {} }); - let msg = format_mcp_message(&body); - assert!(msg.starts_with("Content-Length: ")); - let parts: Vec<&str> = msg.split("\r\n\r\n").collect(); - assert_eq!(parts.len(), 2); - let body_part = parts[1]; - // No trailing newline — Content-Length must match exact body bytes - assert!(!body_part.ends_with("\n")); - let parsed: serde_json::Value = serde_json::from_str(body_part).unwrap(); - assert_eq!(parsed, body); - // Verify Content-Length header matches actual body byte count - let header = parts[0]; - let cl_str = header.strip_prefix("Content-Length: ").unwrap(); - let cl: usize = cl_str.parse().unwrap(); - assert_eq!(cl, body_part.len()); -} - -static NL_FILTER_TEST_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); - -fn mock_repo( - id: &str, - language: Option<&str>, - tags: Vec<&str>, - stars: Option, -) -> crate::registry::RepoEntry { - crate::registry::RepoEntry { - id: id.to_string(), - local_path: std::path::PathBuf::from(format!("/tmp/{}", id)), - tags: tags.into_iter().map(String::from).collect(), - discovered_at: chrono::Utc::now(), - language: language.map(String::from), - workspace_type: "git".to_string(), - data_tier: "private".to_string(), - last_synced_at: None, - stars, - remotes: vec![], - } -} - -#[test] -fn test_nl_filter_repos_empty_query_returns_empty() -> anyhow::Result<()> { - let _guard = NL_FILTER_TEST_LOCK.lock().unwrap(); - let conn = crate::registry::WorkspaceRegistry::init_in_memory()?; - let repos: Vec = vec![]; - let backend = crate::storage::TempStorageBackend::new(); - let index_path = backend.index_path()?; - let searcher = crate::search::SearchClientImpl; - let analyzer = crate::health::RepoAnalyzerImpl; - let results = crate::mcp::tools::repo::nl_filter_repos_at( - &index_path, - "", - &repos, - &conn, - &searcher, - &analyzer, - )?; - assert!(results.is_empty()); - Ok(()) -} - -#[test] -fn test_nl_filter_repos_fallback_finds_by_language() -> anyhow::Result<()> { - let _guard = NL_FILTER_TEST_LOCK.lock().unwrap(); - let conn = crate::registry::WorkspaceRegistry::init_in_memory()?; - let repos = vec![ - mock_repo("repo1", Some("rust"), vec!["cli"], Some(10)), - mock_repo("repo2", Some("python"), vec!["web"], Some(5)), - ]; - let backend = crate::storage::TempStorageBackend::new(); - let index_path = backend.index_path()?; - let searcher = crate::search::SearchClientImpl; - let analyzer = crate::health::RepoAnalyzerImpl; - let results = crate::mcp::tools::repo::nl_filter_repos_at( - &index_path, - "rust cli tool", - &repos, - &conn, - &searcher, - &analyzer, - )?; - assert_eq!(results.len(), 1); - assert_eq!(results[0].id, "repo1"); - Ok(()) -} - -#[test] -fn test_nl_filter_repos_tantivy_finds_devbase() -> anyhow::Result<()> { - let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); - let index_path = backend.index_path()?; - - // Ensure DB schema exists - let conn = crate::registry::WorkspaceRegistry::init_db_with(&*backend)?; - - // Populate Tantivy index with devbase doc - let (index, _reader) = crate::search::init_index_at(&index_path)?; - let mut writer = crate::search::get_writer(&index)?; - let schema = index.schema(); - crate::search::add_repo_doc( - &mut writer, - &schema, - "devbase", - "devbase developer workspace manager", - "rust, cli, workspace, developer", - &["rust".to_string(), "cli".to_string()], - )?; - crate::search::commit_writer(&mut writer)?; - - let repos = vec![crate::registry::RepoEntry { - id: "devbase".to_string(), - local_path: std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")), - tags: vec!["rust".to_string(), "cli".to_string()], - discovered_at: chrono::Utc::now(), - language: Some("rust".to_string()), - workspace_type: "git".to_string(), - data_tier: "private".to_string(), - last_synced_at: None, - stars: Some(10), - remotes: vec![], - }]; - - let searcher = crate::search::SearchClientImpl; - let analyzer = crate::health::RepoAnalyzerImpl; - let results = crate::mcp::tools::repo::nl_filter_repos_at( - &index_path, - "developer workspace", - &repos, - &conn, - &searcher, - &analyzer, - )?; - assert!(!results.is_empty(), "tantivy path should find devbase"); - assert_eq!(results[0].id, "devbase"); - Ok(()) -} - -#[test] -fn test_format_mcp_message() { - let body = serde_json::json!({"jsonrpc": "2.0", "id": 1}); - let msg = format_mcp_message(&body); - assert!(msg.starts_with("Content-Length:")); - assert!(msg.contains("\r\n\r\n")); - // No trailing newline — spec-compliant MCP message ends after JSON body - assert!(!msg.ends_with("\n")); -} - -#[test] -fn test_parse_tool_tiers() { - let tiers = parse_tool_tiers("stable,beta"); - assert!(tiers.contains(&ToolTier::Stable)); - assert!(tiers.contains(&ToolTier::Beta)); - assert!(!tiers.contains(&ToolTier::Experimental)); -} - -#[test] -fn test_parse_tool_tiers_empty() { - let tiers = parse_tool_tiers(""); - assert!(tiers.is_empty()); -} - -// --- Claude Scenario Validation Tests --- - -fn seed_scenario_data(ctx: &crate::storage::AppContext) { - let mut conn = ctx.conn().unwrap(); - let now = chrono::Utc::now().to_rfc3339(); - - // 1. Register a repo in the entities table (single source of truth) - conn.execute( - "INSERT INTO entities (id, entity_type, name, local_path, metadata, created_at, updated_at, language, discovered_at, workspace_type, data_tier, stars) - VALUES (?1, 'repo', ?2, ?3, ?4, ?5, ?5, ?6, ?5, ?7, ?8, ?9)", - rusqlite::params!["scenario-repo", "scenario-repo", "/tmp/scenario-repo", "{}", &now, "rust", "git", "private", 42i64], - ).unwrap(); - - // 2. Tags (including "managed" for is_managed coverage) - for tag in &["rust", "cli", "managed"] { - conn.execute( - "INSERT INTO repo_tags (repo_id, tag) VALUES (?1, ?2)", - rusqlite::params!["scenario-repo", tag], - ) - .unwrap(); - } - - // 3. Code symbols: 10 entries, mix of functions and structs. - // Include auth-related signatures so "authentication flow" keyword search hits. - let symbols: [(&str, &str, &str, i64, Option<&str>); 10] = [ - ( - "src/auth.rs", - "function", - "authenticate_user", - 10, - Some("pub fn authenticate_user(token: &str) // authentication flow handler"), - ), - ( - "src/auth.rs", - "function", - "validate_token", - 20, - Some("fn validate_token(t: &str) -> bool"), - ), - ( - "src/lib.rs", - "function", - "handle_error", - 30, - Some("pub fn handle_error(e: Error)"), - ), - ( - "src/lib.rs", - "function", - "parse_config", - 40, - Some("fn parse_config() -> Config"), - ), - ("src/main.rs", "function", "main", 1, Some("fn main()")), - ("src/lib.rs", "struct", "Config", 5, None), - ("src/models.rs", "struct", "User", 10, None), - ("src/models.rs", "function", "new_user", 15, Some("fn new_user() -> User")), - ("src/db.rs", "function", "connect_pool", 5, Some("fn connect_pool() -> Pool")), - ("src/api.rs", "function", "serve", 1, Some("pub async fn serve(addr: &str)")), - ]; - for (path, ty, name, line, sig) in &symbols { - conn.execute( - "INSERT INTO code_symbols (repo_id, file_path, symbol_type, name, line_start, signature) - VALUES (?1, ?2, ?3, ?4, ?5, ?6)", - rusqlite::params!["scenario-repo", path, ty, name, line, *sig], - ).unwrap(); - } - - // 4. Vault notes: create filesystem files then scan into registry - let ws = ctx.storage.workspace_dir().unwrap(); - let vault_dir = ws.join("vault"); - std::fs::create_dir_all(&vault_dir).unwrap(); - std::fs::write( - vault_dir.join("auth-design.md"), - "---\ntitle: Authentication Flow Design\nrepo: scenario-repo\ntags: [auth, design]\n---\n\nThis document describes the authentication flow for the scenario repo.\nThe authenticate_user function handles token validation.\n", - ).unwrap(); - crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); -} - -#[tokio::test] -async fn test_scenario_one_project_onboarding() { - let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); - let mut ctx = crate::storage::AppContext::with_storage(backend).unwrap(); - seed_scenario_data(&ctx); - - // Tool 1: devkit_health - let health_tool = DevkitHealthTool; - let health_result = health_tool - .invoke(serde_json::json!({ "detail": true }), &mut ctx) - .await - .unwrap(); - assert_eq!(health_result.get("success").unwrap(), true); - let summary = health_result.get("summary").unwrap(); - assert!(summary.get("total_repos").unwrap().as_i64().unwrap() >= 1); - - // Tool 2: devkit_project_brief - let brief_tool = DevkitProjectBriefTool; - let brief_result = brief_tool - .invoke(serde_json::json!({ "repo_id": "scenario-repo" }), &mut ctx) - .await - .unwrap(); - assert_eq!(brief_result.get("success").unwrap(), true); - let brief = brief_result.get("brief").unwrap().as_str().unwrap(); - // Acceptance: brief contains >= 5 key modules/symbols - let symbol_count = brief.matches("- `").count(); - assert!( - symbol_count >= 5, - "Expected >= 5 symbols in brief, found {}. Brief:\n{}", - symbol_count, - brief - ); - assert!(brief.contains("## Architecture")); - assert!(brief.contains("Key Symbols:")); - - // Tool 3: devkit_query_repos - let query_tool = DevkitQueryReposTool; - let query_result = query_tool.invoke(serde_json::json!({}), &mut ctx).await.unwrap(); - assert_eq!(query_result.get("success").unwrap(), true); - let repos = query_result.get("repos").unwrap().as_array().unwrap(); - assert!( - repos - .iter() - .any(|r| r.get("id").and_then(|v| v.as_str()) == Some("scenario-repo")), - "scenario-repo should be listed in query_repos" - ); -} - -#[tokio::test] -async fn test_tools_call_devkit_document_convert_not_found() { - let server = build_server(); - let req = serde_json::json!({ - "jsonrpc": "2.0", - "id": 18, - "method": "tools/call", - "params": { - "name": "devkit_document_convert", - "arguments": { "source_path": "/nonexistent/file.pdf" } - } - }); - let (mut ctx, _tmp) = test_ctx(); - let resp = server.handle_request(req, &mut ctx).await.unwrap(); - let result = resp.get("result").unwrap(); - let content = result.get("content").unwrap().as_array().unwrap(); - let text = content[0].get("text").unwrap().as_str().unwrap(); - let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); - assert_eq!(parsed.get("success").unwrap(), false); - let err = parsed.get("error").unwrap().as_str().unwrap(); - assert!(err.contains("not found") || err.contains("Source file")); -} - -#[tokio::test] -async fn test_scenario_two_semantic_exploration() { - let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); - let mut ctx = crate::storage::AppContext::with_storage(backend).unwrap(); - seed_scenario_data(&ctx); - - // Tool 1: devkit_hybrid_search — keyword fallback path (no embeddings seeded) - let search_tool = DevkitHybridSearchTool; - let search_result = search_tool - .invoke( - serde_json::json!({ "repo_id": "scenario-repo", "query_text": "authentication flow", "limit": 10 }), - &mut ctx, - ) - .await - .unwrap(); - assert_eq!(search_result.get("success").unwrap(), true); - let symbols = search_result.get("symbols").unwrap().as_array().unwrap(); - assert!( - !symbols.is_empty(), - "hybrid_search should return at least 1 auth-related symbol via keyword fallback" - ); - let names: Vec<&str> = - symbols.iter().filter_map(|s| s.get("name").and_then(|v| v.as_str())).collect(); - assert!( - names.contains(&"authenticate_user"), - "authenticate_user should appear in hybrid_search results for 'authentication flow'. Got: {:?}", - names - ); - - // Tool 2: devkit_project_context - let context_tool = DevkitProjectContextTool; - let ctx_result = context_tool - .invoke(serde_json::json!({ "project": "scenario-repo" }), &mut ctx) - .await - .unwrap(); - assert_eq!(ctx_result.get("success").unwrap(), true); - let ctx_symbols = ctx_result.get("symbols").unwrap().as_array().unwrap(); - assert!( - ctx_symbols.len() >= 3, - "project_context should return >= 3 symbols for understanding. Got: {}", - ctx_symbols.len() - ); - - // Tool 3: devkit_vault_search - let vault_tool = DevkitVaultSearchTool; - let vault_result = vault_tool - .invoke(serde_json::json!({ "query": "authentication" }), &mut ctx) - .await - .unwrap(); - assert_eq!(vault_result.get("success").unwrap(), true); - let notes = vault_result.get("notes").unwrap().as_array().unwrap(); - assert!(!notes.is_empty(), "vault_search should find the auth-design note"); - assert!( - notes - .iter() - .any(|n| n.get("title").and_then(|v| v.as_str()) == Some("Authentication Flow Design")), - "vault_search should return auth-design note" - ); -} +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use super::*; +use crate::storage::StorageBackend; + +fn test_ctx() -> (crate::storage::AppContext, tempfile::TempDir) { + let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); + let ctx = crate::storage::AppContext::with_storage(backend).unwrap(); + // Return a dummy TempDir to keep call-site destructuring compatible. + let tmp = tempfile::tempdir().unwrap(); + (ctx, tmp) +} + +/// Lightweight helper: seed a single repo into the entities table. +fn seed_repo(ctx: &crate::storage::AppContext, id: &str, lang: &str) { + let conn = ctx.conn().unwrap(); + let now = chrono::Utc::now().to_rfc3339(); + conn.execute( + "INSERT INTO entities (id, entity_type, name, local_path, metadata, created_at, updated_at, language, discovered_at, workspace_type, data_tier, stars) + VALUES (?1, 'repo', ?2, ?3, ?4, ?5, ?5, ?6, ?5, 'git', 'private', 0)", + rusqlite::params![id, id, format!("/tmp/{}", id), "{}", &now, lang], + ).unwrap(); +} + +#[tokio::test] +async fn test_initialize() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 1, + "method": "initialize" + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + assert_eq!(resp.get("jsonrpc").unwrap(), "2.0"); + let result = resp.get("result").unwrap(); + assert_eq!(result.get("protocolVersion").unwrap(), "2024-11-05"); + assert_eq!(result.get("serverInfo").unwrap().get("name").unwrap(), "devbase"); + assert!(result.get("capabilities").unwrap().get("tools").is_some()); +} + +#[tokio::test] +async fn test_tools_list() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 2, + "method": "tools/list" + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let tools = resp.get("result").unwrap().get("tools").unwrap().as_array().unwrap(); + assert_eq!(tools.len(), 71); + let names: Vec<&str> = tools.iter().map(|t| t.get("name").unwrap().as_str().unwrap()).collect(); + assert!(names.contains(&"devkit_index_health")); + assert!(names.contains(&"devkit_vault_export")); + assert!(names.contains(&"devkit_vault_history")); + assert!(names.contains(&"devkit_search_quality")); + assert!(names.contains(&"devkit_session_save")); + assert!(names.contains(&"devkit_session_list")); + assert!(names.contains(&"devkit_session_resume")); + assert!(names.contains(&"devkit_session_recall")); + assert!(names.contains(&"devkit_session_index")); + assert!(names.contains(&"devkit_session_export")); + assert!(names.contains(&"devkit_session_import")); + assert!(names.contains(&"devkit_evaluate")); + assert!(names.contains(&"devkit_document_convert")); + assert!(names.contains(&"devkit_scan")); + assert!(names.contains(&"devkit_health")); + assert!(names.contains(&"devkit_sync")); + assert!(names.contains(&"devkit_query")); + assert!(names.contains(&"devkit_query_repos")); + assert!(names.contains(&"devkit_index")); + assert!(names.contains(&"devkit_index_stream")); + assert!(names.contains(&"devkit_status")); + assert!(names.contains(&"devkit_note")); + assert!(names.contains(&"devkit_digest")); + assert!(names.contains(&"devkit_paper_index")); + assert!(names.contains(&"devkit_experiment_log")); + assert!(names.contains(&"devkit_github_info")); + assert!(names.contains(&"devkit_code_metrics")); + assert!(names.contains(&"devkit_module_graph")); + assert!(names.contains(&"devkit_code_symbols")); + assert!(names.contains(&"devkit_dependency_graph")); + assert!(names.contains(&"devkit_call_graph")); + assert!(names.contains(&"devkit_dead_code")); + assert!(names.contains(&"devkit_semantic_search")); + assert!(names.contains(&"devkit_embedding_store")); + assert!(names.contains(&"devkit_embedding_search")); + assert!(names.contains(&"devkit_natural_language_query")); + assert!(names.contains(&"devkit_vault_search")); + assert!(names.contains(&"devkit_vault_read")); + assert!(names.contains(&"devkit_vault_write")); + assert!(names.contains(&"devkit_vault_backlinks")); + assert!(names.contains(&"devkit_vault_daily")); + assert!(names.contains(&"devkit_vault_graph")); + assert!(names.contains(&"devkit_project_context")); + assert!(names.contains(&"devkit_project_brief")); + assert!(names.contains(&"devkit_impact_analysis")); + assert!(names.contains(&"devkit_cross_repo_search")); + assert!(names.contains(&"devkit_knowledge_report")); + assert!(names.contains(&"devkit_related_symbols")); + assert!(names.contains(&"devkit_hybrid_search")); + assert!(names.contains(&"devkit_skill_list")); + assert!(names.contains(&"devkit_skill_search")); + assert!(names.contains(&"devkit_skill_run")); + assert!(names.contains(&"devkit_skill_discover")); + assert!(names.contains(&"devkit_skill_sync")); + assert!(names.contains(&"devkit_known_limit_store")); + assert!(names.contains(&"devkit_known_limit_list")); + assert!(names.contains(&"devkit_relation_store")); + assert!(names.contains(&"devkit_relation_query")); + assert!(names.contains(&"devkit_relation_delete")); + assert!(names.contains(&"devkit_workflow_list")); + assert!(names.contains(&"devkit_workflow_run")); + assert!(names.contains(&"devkit_workflow_status")); + for tool in tools { + assert!(tool.get("name").is_some()); + assert!(tool.get("description").is_some()); + assert!(tool.get("inputSchema").is_some()); + } +} + +#[tokio::test] +async fn test_tools_call_devkit_health() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 3, + "method": "tools/call", + "params": { + "name": "devkit_health", + "arguments": { "detail": false } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + if parsed.get("success").unwrap() != &serde_json::Value::Bool(true) { + eprintln!( + "devkit_health returned error: {}", + serde_json::to_string_pretty(&parsed).unwrap() + ); + } + assert_eq!(parsed.get("success").unwrap(), true); + let summary = parsed.get("summary").unwrap(); + assert!(summary.get("total_repos").unwrap().as_i64().unwrap() >= 0); +} + +#[tokio::test] +async fn test_tools_call_devkit_query() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 4, + "method": "tools/call", + "params": { + "name": "devkit_query", + "arguments": { "expression": "lang:rust" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert!(parsed.get("count").unwrap().as_i64().unwrap() >= 0); +} + +#[tokio::test] +async fn test_tools_call_unknown_tool() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 5, + "method": "tools/call", + "params": { + "name": "unknown_tool", + "arguments": {} + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + assert!(resp.get("error").is_some()); + let error = resp.get("error").unwrap(); + assert_eq!(error.get("code").unwrap().as_i64().unwrap(), -32602); +} + +#[tokio::test] +async fn test_unknown_method() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 6, + "method": "unknown/method" + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + assert!(resp.get("error").is_some()); + let error = resp.get("error").unwrap(); + assert_eq!(error.get("code").unwrap().as_i64().unwrap(), -32601); +} + +#[tokio::test] +async fn test_tools_call_devkit_project_context() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 7, + "method": "tools/call", + "params": { + "name": "devkit_project_context", + "arguments": { "project": "nonexistent-project-xyz" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + assert_eq!(result.get("content").unwrap().as_array().unwrap().len(), 1); + let text = result["content"][0]["text"].as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert!(parsed.get("repo").unwrap().is_null()); + assert!(parsed.get("vault_notes").unwrap().as_array().unwrap().is_empty()); + assert!(parsed.get("assets").unwrap().as_array().unwrap().is_empty()); +} + +#[tokio::test] +async fn test_tools_call_devkit_query_repos() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // 1. Empty registry returns empty results + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 10, + "method": "tools/call", + "params": { + "name": "devkit_query_repos", + "arguments": { "language": "" } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("count").unwrap().as_i64().unwrap(), 0); + + // 2. Seeded repo is returned with correct filtering + seed_repo(&ctx, "test-repo", "rust"); + let req2 = serde_json::json!({ + "jsonrpc": "2.0", + "id": 11, + "method": "tools/call", + "params": { + "name": "devkit_query_repos", + "arguments": { "language": "rust" } + } + }); + let resp2 = server.handle_request(req2, &mut ctx).await.unwrap(); + let result2 = resp2.get("result").unwrap(); + let content2 = result2.get("content").unwrap().as_array().unwrap(); + let text2 = content2[0].get("text").unwrap().as_str().unwrap(); + let parsed2: serde_json::Value = serde_json::from_str(text2).unwrap(); + assert_eq!(parsed2.get("success").unwrap(), true); + let repos = parsed2.get("repos").unwrap().as_array().unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].get("id").unwrap().as_str().unwrap(), "test-repo"); + assert_eq!(repos[0].get("language").unwrap().as_str().unwrap(), "rust"); +} + +#[tokio::test] +async fn test_tools_call_devkit_vault_search() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // Setup: create vault note and scan + let ws = ctx.storage.workspace_dir().unwrap(); + let vault_dir = ws.join("vault"); + std::fs::create_dir_all(&vault_dir).unwrap(); + std::fs::write( + vault_dir.join("test-note.md"), + "---\ntitle: Test Note\ntags: [test, vault]\n---\n\nThis is a test note for vault search.\n", + ).unwrap(); + let mut conn = ctx.conn().unwrap(); + crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); + + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 12, + "method": "tools/call", + "params": { + "name": "devkit_vault_search", + "arguments": { "query": "test note" } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + let notes = parsed.get("notes").unwrap().as_array().unwrap(); + assert!(!notes.is_empty(), "vault_search should find the test-note"); + assert!( + notes + .iter() + .any(|n| n.get("title").and_then(|v| v.as_str()) == Some("Test Note")), + "vault_search should return Test Note" + ); +} + +#[tokio::test] +async fn test_tools_call_devkit_vault_read() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // Setup: create vault note and scan + let ws = ctx.storage.workspace_dir().unwrap(); + let vault_dir = ws.join("vault"); + std::fs::create_dir_all(&vault_dir).unwrap(); + let note_path = vault_dir.join("test-read.md"); + std::fs::write( + ¬e_path, + "---\ntitle: Readable Note\ntags: [read]\n---\n\nContent body here.\n", + ) + .unwrap(); + let mut conn = ctx.conn().unwrap(); + crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); + + // 1. Read existing note by absolute path + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 13, + "method": "tools/call", + "params": { + "name": "devkit_vault_read", + "arguments": { "path": note_path.to_str().unwrap() } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("path").unwrap().as_str().unwrap(), note_path.to_str().unwrap()); + let frontmatter = parsed.get("frontmatter").unwrap().as_str().unwrap(); + assert!(frontmatter.contains("title: Readable Note")); + let body = parsed.get("content").unwrap().as_str().unwrap(); + assert!(body.contains("Content body here.")); + + // 2. Read non-existent note returns error + let req2 = serde_json::json!({ + "jsonrpc": "2.0", + "id": 14, + "method": "tools/call", + "params": { + "name": "devkit_vault_read", + "arguments": { "path": "/nonexistent/path/note.md" } + } + }); + let resp2 = server.handle_request(req2, &mut ctx).await.unwrap(); + let result2 = resp2.get("result").unwrap(); + assert_eq!(result2.get("isError").unwrap(), true); + let content2 = result2.get("content").unwrap().as_array().unwrap(); + let text2 = content2[0].get("text").unwrap().as_str().unwrap(); + let parsed2: serde_json::Value = serde_json::from_str(text2).unwrap(); + assert_eq!(parsed2.get("success").unwrap(), false); + assert!( + parsed2.get("error").unwrap().as_str().unwrap().contains("not found") + || parsed2.get("error").unwrap().as_str().unwrap().contains("unreadable") + ); +} + +#[tokio::test] +async fn test_tools_call_devkit_arxiv_fetch() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 8, + "method": "tools/call", + "params": { + "name": "devkit_arxiv_fetch", + "arguments": { "arxiv_id": "" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + // Empty arxiv_id should result in an error from the arXiv API or parser + assert_eq!(parsed.get("success").unwrap(), false); + assert!(!parsed.get("error").unwrap().as_str().unwrap().is_empty()); +} + +#[tokio::test] +async fn test_tools_call_devkit_status() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 15, + "method": "tools/call", + "params": { + "name": "devkit_status", + "arguments": {} + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + // Empty registry → overall "fresh" (vacuous truth: all 0 repos are fresh) + assert_eq!(parsed.get("overall").unwrap().as_str().unwrap(), "fresh"); + let repos = parsed.get("repos").unwrap().as_array().unwrap(); + assert!(repos.is_empty()); +} + +#[tokio::test] +async fn test_tools_call_devkit_workflow_list() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 16, + "method": "tools/call", + "params": { + "name": "devkit_workflow_list", + "arguments": {} + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("count").unwrap().as_i64().unwrap(), 0); + let workflows = parsed.get("workflows").unwrap().as_array().unwrap(); + assert!(workflows.is_empty()); +} + +#[tokio::test] +async fn test_tools_call_devkit_index() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 17, + "method": "tools/call", + "params": { + "name": "devkit_index", + "arguments": { "path": "" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + // Empty registry → indexed 0 repos + assert_eq!(parsed.get("indexed").unwrap().as_i64().unwrap(), 0); +} + +#[tokio::test] +async fn test_tools_call_devkit_skill_list() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 9, + "method": "tools/call", + "params": { + "name": "devkit_skill_list", + "arguments": {} + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert!(parsed.get("skills").unwrap().is_array()); + assert!(parsed.get("count").unwrap().as_i64().unwrap() >= 0); +} + +#[tokio::test] +async fn test_tools_call_devkit_skill_search() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 10, + "method": "tools/call", + "params": { + "name": "devkit_skill_search", + "arguments": { "query": "report" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert!(parsed.get("skills").unwrap().is_array()); + assert!(parsed.get("count").unwrap().as_i64().unwrap() >= 0); +} + +#[tokio::test] +async fn test_tools_call_devkit_skill_discover() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 11, + "method": "tools/call", + "params": { + "name": "devkit_skill_discover", + "arguments": { + "path": ".", + "skill_id": "mcp-test-discover", + "dry_run": true + } + } + }); + // SAFETY: test-only env var mutation; test runner guarantees no concurrent + // reads of DEVBASE_MCP_ENABLE_DESTRUCTIVE in this process. + unsafe { + std::env::set_var("DEVBASE_MCP_ENABLE_DESTRUCTIVE", "1"); + } + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert!(!parsed.get("id").unwrap().as_str().unwrap().is_empty()); + assert!(!parsed.get("name").unwrap().as_str().unwrap().is_empty()); + assert!(parsed.get("version").unwrap().as_str().is_some()); + assert!(parsed.get("category").is_some()); +} + +#[test] +fn test_destructive_gate_disabled_by_default() { + // Ensure the variable is unset + // SAFETY: test-only env var mutation; no concurrent reads of this var. + unsafe { + std::env::remove_var("DEVBASE_MCP_ENABLE_DESTRUCTIVE"); + } + let result = crate::mcp::check_destructive_enabled(); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("DEVBASE_MCP_ENABLE_DESTRUCTIVE")); +} + +#[test] +fn test_destructive_gate_enabled() { + // SAFETY: test-only env var mutation; no concurrent reads of this var. + unsafe { + std::env::set_var("DEVBASE_MCP_ENABLE_DESTRUCTIVE", "1"); + } + let result = crate::mcp::check_destructive_enabled(); + assert!(result.is_ok()); + // Cleanup + // SAFETY: test-only env var mutation; no concurrent reads of this var. + unsafe { + std::env::remove_var("DEVBASE_MCP_ENABLE_DESTRUCTIVE"); + } +} + +#[tokio::test] +#[ignore = "requires knowledge-report skill installed and may run external Python process"] +async fn test_tools_call_devkit_skill_run() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 11, + "method": "tools/call", + "params": { + "name": "devkit_skill_run", + "arguments": { + "skill_id": "knowledge-report", + "args": { "repo_id": "devbase" } + } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert!(parsed.get("status").is_some()); + assert!(parsed.get("stdout").is_some()); +} + +#[tokio::test] +async fn test_stdio_content_length_format() { + let body = serde_json::json!({ "jsonrpc": "2.0", "id": 1, "result": {} }); + let msg = format_mcp_message(&body); + assert!(msg.starts_with("Content-Length: ")); + let parts: Vec<&str> = msg.split("\r\n\r\n").collect(); + assert_eq!(parts.len(), 2); + let body_part = parts[1]; + // No trailing newline — Content-Length must match exact body bytes + assert!(!body_part.ends_with("\n")); + let parsed: serde_json::Value = serde_json::from_str(body_part).unwrap(); + assert_eq!(parsed, body); + // Verify Content-Length header matches actual body byte count + let header = parts[0]; + let cl_str = header.strip_prefix("Content-Length: ").unwrap(); + let cl: usize = cl_str.parse().unwrap(); + assert_eq!(cl, body_part.len()); +} + +static NL_FILTER_TEST_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + +fn mock_repo( + id: &str, + language: Option<&str>, + tags: Vec<&str>, + stars: Option, +) -> crate::registry::RepoEntry { + crate::registry::RepoEntry { + id: id.to_string(), + local_path: std::path::PathBuf::from(format!("/tmp/{}", id)), + tags: tags.into_iter().map(String::from).collect(), + discovered_at: chrono::Utc::now(), + language: language.map(String::from), + workspace_type: "git".to_string(), + data_tier: "private".to_string(), + last_synced_at: None, + stars, + remotes: vec![], + } +} + +#[test] +fn test_nl_filter_repos_empty_query_returns_empty() -> anyhow::Result<()> { + let _guard = NL_FILTER_TEST_LOCK.lock().unwrap(); + let conn = crate::registry::WorkspaceRegistry::init_in_memory()?; + let repos: Vec = vec![]; + let backend = crate::storage::TempStorageBackend::new(); + let index_path = backend.index_path()?; + let searcher = crate::search::SearchClientImpl; + let analyzer = crate::health::RepoAnalyzerImpl; + let results = crate::mcp::tools::repo::nl_filter_repos_at( + &index_path, + "", + &repos, + &conn, + &searcher, + &analyzer, + )?; + assert!(results.is_empty()); + Ok(()) +} + +#[test] +fn test_nl_filter_repos_fallback_finds_by_language() -> anyhow::Result<()> { + let _guard = NL_FILTER_TEST_LOCK.lock().unwrap(); + let conn = crate::registry::WorkspaceRegistry::init_in_memory()?; + let repos = vec![ + mock_repo("repo1", Some("rust"), vec!["cli"], Some(10)), + mock_repo("repo2", Some("python"), vec!["web"], Some(5)), + ]; + let backend = crate::storage::TempStorageBackend::new(); + let index_path = backend.index_path()?; + let searcher = crate::search::SearchClientImpl; + let analyzer = crate::health::RepoAnalyzerImpl; + let results = crate::mcp::tools::repo::nl_filter_repos_at( + &index_path, + "rust cli tool", + &repos, + &conn, + &searcher, + &analyzer, + )?; + assert_eq!(results.len(), 1); + assert_eq!(results[0].id, "repo1"); + Ok(()) +} + +#[test] +fn test_nl_filter_repos_tantivy_finds_devbase() -> anyhow::Result<()> { + let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); + let index_path = backend.index_path()?; + + // Ensure DB schema exists + let conn = crate::registry::WorkspaceRegistry::init_db_with(&*backend)?; + + // Populate Tantivy index with devbase doc + let (index, _reader) = crate::search::init_index_at(&index_path)?; + let mut writer = crate::search::get_writer(&index)?; + let schema = index.schema(); + crate::search::add_repo_doc( + &mut writer, + &schema, + "devbase", + "devbase developer workspace manager", + "rust, cli, workspace, developer", + &["rust".to_string(), "cli".to_string()], + )?; + crate::search::commit_writer(&mut writer)?; + + let repos = vec![crate::registry::RepoEntry { + id: "devbase".to_string(), + local_path: std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")), + tags: vec!["rust".to_string(), "cli".to_string()], + discovered_at: chrono::Utc::now(), + language: Some("rust".to_string()), + workspace_type: "git".to_string(), + data_tier: "private".to_string(), + last_synced_at: None, + stars: Some(10), + remotes: vec![], + }]; + + let searcher = crate::search::SearchClientImpl; + let analyzer = crate::health::RepoAnalyzerImpl; + let results = crate::mcp::tools::repo::nl_filter_repos_at( + &index_path, + "developer workspace", + &repos, + &conn, + &searcher, + &analyzer, + )?; + assert!(!results.is_empty(), "tantivy path should find devbase"); + assert_eq!(results[0].id, "devbase"); + Ok(()) +} + +#[test] +fn test_format_mcp_message() { + let body = serde_json::json!({"jsonrpc": "2.0", "id": 1}); + let msg = format_mcp_message(&body); + assert!(msg.starts_with("Content-Length:")); + assert!(msg.contains("\r\n\r\n")); + // No trailing newline — spec-compliant MCP message ends after JSON body + assert!(!msg.ends_with("\n")); +} + +#[test] +fn test_parse_tool_tiers() { + let tiers = parse_tool_tiers("stable,beta"); + assert!(tiers.contains(&ToolTier::Stable)); + assert!(tiers.contains(&ToolTier::Beta)); + assert!(!tiers.contains(&ToolTier::Experimental)); +} + +#[test] +fn test_parse_tool_tiers_empty() { + let tiers = parse_tool_tiers(""); + assert!(tiers.is_empty()); +} + +// --- Claude Scenario Validation Tests --- + +fn seed_scenario_data(ctx: &crate::storage::AppContext) { + let mut conn = ctx.conn().unwrap(); + let now = chrono::Utc::now().to_rfc3339(); + + // 1. Register a repo in the entities table (single source of truth) + conn.execute( + "INSERT INTO entities (id, entity_type, name, local_path, metadata, created_at, updated_at, language, discovered_at, workspace_type, data_tier, stars) + VALUES (?1, 'repo', ?2, ?3, ?4, ?5, ?5, ?6, ?5, ?7, ?8, ?9)", + rusqlite::params!["scenario-repo", "scenario-repo", "/tmp/scenario-repo", "{}", &now, "rust", "git", "private", 42i64], + ).unwrap(); + + // 2. Tags (including "managed" for is_managed coverage) + for tag in &["rust", "cli", "managed"] { + conn.execute( + "INSERT INTO repo_tags (repo_id, tag) VALUES (?1, ?2)", + rusqlite::params!["scenario-repo", tag], + ) + .unwrap(); + } + + // 3. Code symbols: 10 entries, mix of functions and structs. + // Include auth-related signatures so "authentication flow" keyword search hits. + let symbols: [(&str, &str, &str, i64, Option<&str>); 10] = [ + ( + "src/auth.rs", + "function", + "authenticate_user", + 10, + Some("pub fn authenticate_user(token: &str) // authentication flow handler"), + ), + ( + "src/auth.rs", + "function", + "validate_token", + 20, + Some("fn validate_token(t: &str) -> bool"), + ), + ( + "src/lib.rs", + "function", + "handle_error", + 30, + Some("pub fn handle_error(e: Error)"), + ), + ( + "src/lib.rs", + "function", + "parse_config", + 40, + Some("fn parse_config() -> Config"), + ), + ("src/main.rs", "function", "main", 1, Some("fn main()")), + ("src/lib.rs", "struct", "Config", 5, None), + ("src/models.rs", "struct", "User", 10, None), + ("src/models.rs", "function", "new_user", 15, Some("fn new_user() -> User")), + ("src/db.rs", "function", "connect_pool", 5, Some("fn connect_pool() -> Pool")), + ("src/api.rs", "function", "serve", 1, Some("pub async fn serve(addr: &str)")), + ]; + for (path, ty, name, line, sig) in &symbols { + conn.execute( + "INSERT INTO code_symbols (repo_id, file_path, symbol_type, name, line_start, signature) + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["scenario-repo", path, ty, name, line, *sig], + ).unwrap(); + } + + // 4. Vault notes: create filesystem files then scan into registry + let ws = ctx.storage.workspace_dir().unwrap(); + let vault_dir = ws.join("vault"); + std::fs::create_dir_all(&vault_dir).unwrap(); + std::fs::write( + vault_dir.join("auth-design.md"), + "---\ntitle: Authentication Flow Design\nrepo: scenario-repo\ntags: [auth, design]\n---\n\nThis document describes the authentication flow for the scenario repo.\nThe authenticate_user function handles token validation.\n", + ).unwrap(); + crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); +} + +#[tokio::test] +async fn test_scenario_one_project_onboarding() { + let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); + let mut ctx = crate::storage::AppContext::with_storage(backend).unwrap(); + seed_scenario_data(&ctx); + + // Tool 1: devkit_health + let health_tool = DevkitHealthTool; + let health_result = health_tool + .invoke(serde_json::json!({ "detail": true }), &mut ctx) + .await + .unwrap(); + assert_eq!(health_result.get("success").unwrap(), true); + let summary = health_result.get("summary").unwrap(); + assert!(summary.get("total_repos").unwrap().as_i64().unwrap() >= 1); + + // Tool 2: devkit_project_brief + let brief_tool = DevkitProjectBriefTool; + let brief_result = brief_tool + .invoke(serde_json::json!({ "repo_id": "scenario-repo" }), &mut ctx) + .await + .unwrap(); + assert_eq!(brief_result.get("success").unwrap(), true); + let brief = brief_result.get("brief").unwrap().as_str().unwrap(); + // Acceptance: brief contains >= 5 key modules/symbols + let symbol_count = brief.matches("- `").count(); + assert!( + symbol_count >= 5, + "Expected >= 5 symbols in brief, found {}. Brief:\n{}", + symbol_count, + brief + ); + assert!(brief.contains("## Architecture")); + assert!(brief.contains("Key Symbols:")); + + // Tool 3: devkit_query_repos + let query_tool = DevkitQueryReposTool; + let query_result = query_tool.invoke(serde_json::json!({}), &mut ctx).await.unwrap(); + assert_eq!(query_result.get("success").unwrap(), true); + let repos = query_result.get("repos").unwrap().as_array().unwrap(); + assert!( + repos + .iter() + .any(|r| r.get("id").and_then(|v| v.as_str()) == Some("scenario-repo")), + "scenario-repo should be listed in query_repos" + ); +} + +#[tokio::test] +async fn test_tools_call_devkit_document_convert_not_found() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 18, + "method": "tools/call", + "params": { + "name": "devkit_document_convert", + "arguments": { "source_path": "/nonexistent/file.pdf" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), false); + let err = parsed.get("error").unwrap().as_str().unwrap(); + assert!(err.contains("not found") || err.contains("Source file")); +} + +#[tokio::test] +async fn test_scenario_two_semantic_exploration() { + let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); + let mut ctx = crate::storage::AppContext::with_storage(backend).unwrap(); + seed_scenario_data(&ctx); + + // Tool 1: devkit_hybrid_search — keyword fallback path (no embeddings seeded) + let search_tool = DevkitHybridSearchTool; + let search_result = search_tool + .invoke( + serde_json::json!({ "repo_id": "scenario-repo", "query_text": "authentication flow", "limit": 10 }), + &mut ctx, + ) + .await + .unwrap(); + assert_eq!(search_result.get("success").unwrap(), true); + let symbols = search_result.get("symbols").unwrap().as_array().unwrap(); + assert!( + !symbols.is_empty(), + "hybrid_search should return at least 1 auth-related symbol via keyword fallback" + ); + let names: Vec<&str> = + symbols.iter().filter_map(|s| s.get("name").and_then(|v| v.as_str())).collect(); + assert!( + names.contains(&"authenticate_user"), + "authenticate_user should appear in hybrid_search results for 'authentication flow'. Got: {:?}", + names + ); + + // Tool 2: devkit_project_context + let context_tool = DevkitProjectContextTool; + let ctx_result = context_tool + .invoke(serde_json::json!({ "project": "scenario-repo" }), &mut ctx) + .await + .unwrap(); + assert_eq!(ctx_result.get("success").unwrap(), true); + let ctx_symbols = ctx_result.get("symbols").unwrap().as_array().unwrap(); + assert!( + ctx_symbols.len() >= 3, + "project_context should return >= 3 symbols for understanding. Got: {}", + ctx_symbols.len() + ); + + // Tool 3: devkit_vault_search + let vault_tool = DevkitVaultSearchTool; + let vault_result = vault_tool + .invoke(serde_json::json!({ "query": "authentication" }), &mut ctx) + .await + .unwrap(); + assert_eq!(vault_result.get("success").unwrap(), true); + let notes = vault_result.get("notes").unwrap().as_array().unwrap(); + assert!(!notes.is_empty(), "vault_search should find the auth-design note"); + assert!( + notes + .iter() + .any(|n| n.get("title").and_then(|v| v.as_str()) == Some("Authentication Flow Design")), + "vault_search should return auth-design note" + ); +} diff --git a/src/mcp/tools/mod.rs b/src/mcp/tools/mod.rs index 492f24b..535261b 100644 --- a/src/mcp/tools/mod.rs +++ b/src/mcp/tools/mod.rs @@ -1,81 +1,83 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -pub mod brief; -pub mod context; -pub mod document_convert; -pub mod evaluate; -pub mod impact; -pub mod index_health; -pub mod known_limit; -pub mod oplog; -pub mod query; -pub mod relations; -pub mod repo; -pub mod session; -pub mod skill; -pub mod skill_sync; -pub mod status; -pub mod vault; -pub mod workflow; - -pub mod code_analysis; -pub mod external; -pub mod knowledge; -pub mod search; - -pub use brief::*; -pub use context::*; -pub use document_convert::*; -pub use impact::*; -pub use index_health::*; -pub use known_limit::*; -pub use oplog::*; -pub use query::*; -pub use relations::*; -pub use repo::*; -pub use session::*; -pub use skill::*; -pub use skill_sync::*; -pub use status::*; -pub use vault::*; -pub use workflow::*; - -pub use code_analysis::*; -pub use evaluate::*; -pub use external::*; -pub use knowledge::*; -pub use search::*; - -#[cfg(test)] -mod tests { - #[test] - fn test_tool_modules_compile() { - // Smoke test: all tool structs are constructible - let _ = super::context::DevkitProjectContextTool; - let _ = super::known_limit::DevkitKnownLimitStoreTool; - let _ = super::oplog::DevkitOplogQueryTool; - let _ = super::query::DevkitQueryTool; - let _ = super::repo::DevkitIndexTool; - let _ = super::status::DevkitStatusTool; - let _ = super::skill::DevkitSkillListTool; - let _ = super::vault::DevkitVaultSearchTool; - let _ = super::vault::DevkitVaultDailyTool; - let _ = super::vault::DevkitVaultGraphTool; - let _ = super::workflow::DevkitWorkflowListTool; - let _ = super::session::DevkitSessionSaveTool; - let _ = super::session::DevkitSessionListTool; - let _ = super::session::DevkitSessionResumeTool; - let _ = super::session::DevkitSessionAttachTool; - let _ = super::session::DevkitSessionDetachTool; - let _ = super::session::DevkitSessionActivateTool; - let _ = super::session::DevkitSessionSearchTool; - let _ = super::session::DevkitSessionCaptureTool; - let _ = super::session::DevkitSessionWorkflowsTool; - let _ = super::session::DevkitSessionRecallTool; - let _ = super::session::DevkitSessionIndexTool; - let _ = super::session::DevkitSessionExportTool; - let _ = super::session::DevkitSessionImportTool; - let _ = super::brief::DevkitProjectBriefTool; - let _ = super::impact::DevkitImpactAnalysisTool; - } -} +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +pub mod brief; +pub mod context; +pub mod document_convert; +pub mod evaluate; +pub mod impact; +pub mod index_health; +pub mod known_limit; +pub mod ontology_import; +pub mod oplog; +pub mod query; +pub mod relations; +pub mod repo; +pub mod session; +pub mod skill; +pub mod skill_sync; +pub mod status; +pub mod vault; +pub mod workflow; + +pub mod code_analysis; +pub mod external; +pub mod knowledge; +pub mod search; + +pub use brief::*; +pub use context::*; +pub use document_convert::*; +pub use impact::*; +pub use index_health::*; +pub use known_limit::*; +pub use ontology_import::*; +pub use oplog::*; +pub use query::*; +pub use relations::*; +pub use repo::*; +pub use session::*; +pub use skill::*; +pub use skill_sync::*; +pub use status::*; +pub use vault::*; +pub use workflow::*; + +pub use code_analysis::*; +pub use evaluate::*; +pub use external::*; +pub use knowledge::*; +pub use search::*; + +#[cfg(test)] +mod tests { + #[test] + fn test_tool_modules_compile() { + // Smoke test: all tool structs are constructible + let _ = super::context::DevkitProjectContextTool; + let _ = super::known_limit::DevkitKnownLimitStoreTool; + let _ = super::oplog::DevkitOplogQueryTool; + let _ = super::query::DevkitQueryTool; + let _ = super::repo::DevkitIndexTool; + let _ = super::status::DevkitStatusTool; + let _ = super::skill::DevkitSkillListTool; + let _ = super::vault::DevkitVaultSearchTool; + let _ = super::vault::DevkitVaultDailyTool; + let _ = super::vault::DevkitVaultGraphTool; + let _ = super::workflow::DevkitWorkflowListTool; + let _ = super::session::DevkitSessionSaveTool; + let _ = super::session::DevkitSessionListTool; + let _ = super::session::DevkitSessionResumeTool; + let _ = super::session::DevkitSessionAttachTool; + let _ = super::session::DevkitSessionDetachTool; + let _ = super::session::DevkitSessionActivateTool; + let _ = super::session::DevkitSessionSearchTool; + let _ = super::session::DevkitSessionCaptureTool; + let _ = super::session::DevkitSessionWorkflowsTool; + let _ = super::session::DevkitSessionRecallTool; + let _ = super::session::DevkitSessionIndexTool; + let _ = super::session::DevkitSessionExportTool; + let _ = super::session::DevkitSessionImportTool; + let _ = super::brief::DevkitProjectBriefTool; + let _ = super::impact::DevkitImpactAnalysisTool; + } +} diff --git a/src/mcp/tools/ontology_import.rs b/src/mcp/tools/ontology_import.rs new file mode 100644 index 0000000..23c674b --- /dev/null +++ b/src/mcp/tools/ontology_import.rs @@ -0,0 +1,69 @@ +use crate::mcp::McpTool; + +#[derive(Clone)] +pub struct DevkitOntologyImportTool; + +impl McpTool for DevkitOntologyImportTool { + fn name(&self) -> &'static str { + "devkit_ontology_import" + } + + fn schema(&self) -> serde_json::Value { + serde_json::json!({ + "description": r#"Import ontology entities and relations from an OpenClaw-compatible workspace into devbase. + +Reads ontology/entities/*.json and ontology/relations/*.jsonl from the specified workspace path +and maps them into devbase's entities and relations tables. + +Entity JSON format: { "entity_id": "...", "type": "...", "name": "...", "aliases": [...], ... } +Relation JSONL format: { "relation_id": "...", "type": "...", "from": "...", "to": "...", ... } + +Requires DEVBASE_MCP_ENABLE_DESTRUCTIVE=1 since this modifies the registry."#, + "inputSchema": { + "type": "object", + "properties": { + "workspace_path": { + "type": "string", + "description": "Path to the OpenClaw workspace root (contains ontology/ subdirectory). Defaults to the configured openclaw workspace." + } + }, + "required": [] + } + }) + } + + async fn invoke( + &self, + args: serde_json::Value, + ctx: &mut crate::storage::AppContext, + ) -> anyhow::Result { + crate::mcp::check_destructive_enabled()?; + + let workspace = args + .get("workspace_path") + .and_then(|v| v.as_str()) + .map(std::path::PathBuf::from) + .unwrap_or_else(|| { + dirs::home_dir() + .unwrap_or_default() + .join(".kimi_openclaw") + .join("workspace") + }); + + let stats = { + let conn = ctx.conn()?; + crate::registry::import_ontology::import_ontology(&conn, &workspace)? + }; + + Ok(serde_json::json!({ + "workspace": workspace.to_string_lossy().to_string(), + "entities_added": stats.entities_added, + "entities_updated": stats.entities_updated, + "relations_added": stats.relations_added, + "relations_updated": stats.relations_updated, + "errors": stats.errors, + "total_entities": stats.entities_added + stats.entities_updated, + "total_relations": stats.relations_added + stats.relations_updated, + })) + } +} diff --git a/src/registry.rs b/src/registry.rs index 7c2916b..f332000 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -1,537 +1,538 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -//! Registry layer: SQLite-backed entity storage and domain-specific submodules. -//! -//! Central types (`RepoEntry`, `VaultNote`, `PaperEntry`, etc.) and the -//! [`RegistryClient`] trait implementation on [`AppContext`]. -//! Submodules cover repos, health, knowledge, code metrics, call graphs, -//! dead-code analysis, and migrations. - -use crate::clients::RegistryClient; -use crate::storage::AppContext; -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use std::path::PathBuf; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RemoteEntry { - pub remote_name: String, - pub upstream_url: Option, - pub default_branch: Option, - pub last_sync: Option>, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RepoEntry { - pub id: String, - pub local_path: PathBuf, - pub tags: Vec, - pub discovered_at: DateTime, - pub language: Option, - pub workspace_type: String, - pub data_tier: String, - pub last_synced_at: Option>, - pub stars: Option, - pub remotes: Vec, -} - -/// Tags that mark a repository as "managed" for sync purposes. -/// Stored in the `repo_tags` table (not metadata) because tags are the -/// queryable, filterable dimension — metadata is for opaque JSON. -pub const MANAGED_TAGS: &[&str] = &[ - "mirror", - "reference", - "third-party", - "collaborative", - "team", - "own-project", - "tool", - "active", - "managed", -]; - -impl RepoEntry { - /// Return the 'origin' remote if present, otherwise the first remote. - pub fn primary_remote(&self) -> Option<&RemoteEntry> { - self.remotes - .iter() - .find(|r| r.remote_name == "origin") - .or_else(|| self.remotes.first()) - } - - /// Whether this repo is considered "managed" for sync/health automation. - /// Managed status is determined by the presence of any tag in [`MANAGED_TAGS`]. - pub fn is_managed(&self) -> bool { - self.tags.iter().any(|t| MANAGED_TAGS.contains(&t.as_str())) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct VaultNote { - pub id: String, - pub path: String, - pub title: Option, - pub content: String, - pub frontmatter: Option, - pub tags: Vec, - pub outgoing_links: Vec, - pub block_refs: Vec, - pub linked_repo: Option, - pub created_at: DateTime, - pub updated_at: DateTime, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PaperEntry { - pub id: String, - pub title: String, - pub authors: Option, - pub venue: Option, - pub year: Option, - pub pdf_path: Option, - pub bibtex: Option, - pub tags: Vec, - pub added_at: DateTime, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ExperimentEntry { - pub id: String, - pub repo_id: Option, - pub paper_id: Option, - pub config_json: Option, - pub result_path: Option, - pub git_commit: Option, - pub syncthing_folder_id: Option, - pub status: String, - pub timestamp: DateTime, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WorkspaceRegistry { - pub version: String, - pub entries: Vec, -} - -impl Default for WorkspaceRegistry { - fn default() -> Self { - Self { - version: "0.1.0".to_string(), - entries: Vec::new(), - } - } -} - -pub use devbase_registry::health::HealthEntry; -pub use devbase_registry::metrics::CodeMetrics; -pub use devbase_registry::workspace::{OplogEntry, OplogEventType, WorkspaceSnapshot}; - -pub mod entity; -pub mod relation; - -// Backward-compatible re-exports (migrated to entity.rs in v0.15). -pub use entity::{ - ENTITY_TYPE_PAPER, ENTITY_TYPE_REPO, ENTITY_TYPE_SKILL, ENTITY_TYPE_VAULT_NOTE, - ENTITY_TYPE_WORKFLOW, upsert_entity, -}; - -pub mod agent_context; -pub mod call_graph; -pub mod code_symbols; -pub mod dead_code; -pub mod health; -pub mod knowledge; -pub mod knowledge_meta; -pub mod known_limits; -pub mod links; -pub mod metrics; -mod migrate; -pub mod migrations; -pub mod repo; -pub mod repos_toml; -pub mod vault; -pub mod workspace; - -impl RegistryClient for AppContext { - fn list_repos(&self, _filter: Option<&str>) -> anyhow::Result { - let conn = self.conn()?; - let repos = repo::list_repos(&conn)?; - let results: Vec = repos - .into_iter() - .map(|r| { - serde_json::json!({ - "id": r.id, - "local_path": r.local_path, - "language": r.language, - "tags": r.tags, - "workspace_type": r.workspace_type, - "data_tier": r.data_tier, - }) - }) - .collect(); - Ok(serde_json::json!({ "success": true, "count": results.len(), "repos": results })) - } - - fn get_repo(&self, repo_id: &str) -> anyhow::Result { - let conn = self.conn()?; - let repos = repo::list_repos(&conn)?; - match repos.into_iter().find(|r| r.id == repo_id) { - Some(r) => Ok(serde_json::json!({ - "success": true, - "id": r.id, - "local_path": r.local_path, - "language": r.language, - "tags": r.tags, - "workspace_type": r.workspace_type, - "data_tier": r.data_tier, - })), - None => Ok(serde_json::json!({ "success": false, "error": "repo not found" })), - } - } - - fn list_modules(&self, repo_id: &str) -> anyhow::Result { - let conn = self.conn()?; - let modules = knowledge::list_modules(&conn, repo_id)?; - let results: Vec = modules - .into_iter() - .map(|(name, ty, path)| { - serde_json::json!({ - "name": name, - "type": ty, - "path": path, - }) - }) - .collect(); - Ok(serde_json::json!({ "success": true, "count": results.len(), "modules": results })) - } - - fn save_paper(&self, paper: &serde_json::Value) -> anyhow::Result { - let conn = self.conn()?; - let paper_entry: PaperEntry = serde_json::from_value(paper.clone())?; - knowledge::save_paper(&conn, &paper_entry)?; - Ok(serde_json::json!({ "success": true })) - } - - fn save_experiment(&self, exp: &serde_json::Value) -> anyhow::Result { - let conn = self.conn()?; - let exp_entry: ExperimentEntry = serde_json::from_value(exp.clone())?; - WorkspaceRegistry::save_experiment(&conn, &exp_entry)?; - Ok(serde_json::json!({ "success": true })) - } - - fn list_code_metrics(&self) -> anyhow::Result { - let conn = self.conn()?; - let metrics = metrics::list_code_metrics(&conn)?; - let repos: Vec = metrics - .into_iter() - .map(|(id, m)| { - serde_json::json!({ - "repo_id": id, - "total_lines": m.total_lines, - "source_lines": m.source_lines, - "test_lines": m.test_lines, - "comment_lines": m.comment_lines, - "file_count": m.file_count, - "language_breakdown": m.language_breakdown, - "updated_at": m.updated_at.to_rfc3339() - }) - }) - .collect(); - Ok(serde_json::json!({ "success": true, "count": repos.len(), "repos": repos })) - } - - fn get_code_metrics(&self, repo_id: &str) -> anyhow::Result { - let conn = self.conn()?; - match metrics::get_code_metrics(&conn, repo_id)? { - Some(m) => Ok(serde_json::json!({ - "success": true, - "repo_id": repo_id, - "total_lines": m.total_lines, - "source_lines": m.source_lines, - "test_lines": m.test_lines, - "comment_lines": m.comment_lines, - "file_count": m.file_count, - "language_breakdown": m.language_breakdown, - "updated_at": m.updated_at.to_rfc3339() - })), - None => { - Ok(serde_json::json!({ "success": false, "error": "No metrics found for repo" })) - } - } - } - - fn get_health(&self, repo_id: &str) -> anyhow::Result { - let conn = self.conn()?; - match health::get_health(&conn, repo_id)? { - Some(h) => Ok(serde_json::json!({ - "success": true, - "repo_id": repo_id, - "status": h.status, - "ahead": h.ahead, - "behind": h.behind, - "checked_at": h.checked_at.to_rfc3339() - })), - None => Ok(serde_json::json!({ "success": false, "error": "No health data found" })), - } - } - - fn query_call_graph( - &self, - repo_id: &str, - callee: Option<&str>, - caller: Option<&str>, - file: Option<&str>, - limit: usize, - ) -> anyhow::Result { - let conn = self.conn()?; - let edges = call_graph::query_call_edges( - &conn, - repo_id, - callee.filter(|s| !s.is_empty()), - caller.filter(|s| !s.is_empty()), - file.filter(|s| !s.is_empty()), - limit, - )?; - let calls: Vec = edges - .into_iter() - .map(|e| { - serde_json::json!({ - "caller_file": e.caller_file, - "caller_symbol": e.caller_symbol, - "caller_line": e.caller_line, - "callee_name": e.callee_name, - }) - }) - .collect(); - Ok(serde_json::json!({ - "success": true, - "repo_id": repo_id, - "count": calls.len(), - "calls": calls - })) - } - - fn query_dependencies( - &self, - repo_id: &str, - direction: &str, - relation_type: Option<&str>, - ) -> anyhow::Result { - let conn = self.conn()?; - let rel_filter = relation_type.filter(|s| !s.is_empty()); - let label = if direction == "incoming" || direction == "reverse" { - "reverse dependencies" - } else { - "dependencies" - }; - let rows = if direction == "incoming" || direction == "reverse" { - crate::dependency_graph::list_reverse_dependencies(&conn, repo_id)? - } else { - crate::dependency_graph::list_dependencies(&conn, repo_id)? - }; - let deps: Vec = rows - .into_iter() - .filter(|(_, rel, _)| rel_filter.is_none_or(|f| f == rel)) - .map(|(id, rel, conf)| { - serde_json::json!({ - "repo_id": id, - "relation_type": rel, - "confidence": conf, - }) - }) - .collect(); - Ok(serde_json::json!({ - "success": true, - "repo_id": repo_id, - "direction": direction, - "label": label, - "count": deps.len(), - "dependencies": deps - })) - } - - fn query_code_symbols( - &self, - repo_id: &str, - name: Option<&str>, - symbol_type: Option<&str>, - file: Option<&str>, - limit: usize, - ) -> anyhow::Result { - let conn = self.conn()?; - let symbols = - code_symbols::query_code_symbols(&conn, repo_id, name, symbol_type, file, limit)?; - let out: Vec = symbols - .iter() - .map(|s| { - serde_json::json!({ - "file_path": s.file_path, - "symbol_type": s.symbol_type, - "name": s.name, - "line_start": s.line_start, - "line_end": s.line_end, - "signature": s.signature, - }) - }) - .collect(); - Ok(serde_json::json!({ - "success": true, - "repo_id": repo_id, - "count": out.len(), - "symbols": out - })) - } - - fn query_dead_code( - &self, - repo_id: &str, - include_pub: bool, - limit: usize, - ) -> anyhow::Result { - let conn = self.conn()?; - let dead = dead_code::query_dead_code(&conn, repo_id, include_pub, limit)?; - let out: Vec = dead - .iter() - .map(|d| { - serde_json::json!({ - "file_path": d.file_path, - "name": d.name, - "line_start": d.line_start, - "signature": d.signature, - }) - }) - .collect(); - Ok(serde_json::json!({ - "success": true, - "repo_id": repo_id, - "count": out.len(), - "dead_functions": out - })) - } - - fn save_relation( - &self, - from: &str, - to: &str, - relation_type: &str, - confidence: f64, - ) -> anyhow::Result { - let conn = self.conn()?; - relation::save_relation(&conn, from, to, relation_type, confidence)?; - Ok(serde_json::json!({ "success": true })) - } - - fn query_relations( - &self, - entity_id: &str, - direction: &str, - relation_type: Option<&str>, - ) -> anyhow::Result { - let conn = self.conn()?; - let results = match direction { - "bidirectional" => { - let rows = relation::find_related_entities(&conn, entity_id, relation_type)?; - rows.into_iter() - .map(|(from, to, rt, conf, created)| { - serde_json::json!({ - "from_entity_id": from, - "to_entity_id": to, - "relation_type": rt, - "confidence": conf, - "created_at": created - }) - }) - .collect::>() - } - "incoming" => { - let mut stmt = conn.prepare( - "SELECT from_entity_id, relation_type, confidence, created_at FROM relations - WHERE to_entity_id = ?1 - ORDER BY confidence DESC", - )?; - let rows = stmt.query_map([entity_id], |row| { - Ok(( - row.get::<_, String>(0)?, - row.get::<_, String>(1)?, - row.get::<_, f64>(2)?, - row.get::<_, String>(3)?, - )) - })?; - let filtered: Vec<_> = if let Some(rt) = relation_type.filter(|s| !s.is_empty()) { - rows.filter(|r| r.as_ref().map(|(_, t, _, _)| t == rt).unwrap_or(false)) - .collect::, _>>()? - } else { - rows.collect::, _>>()? - }; - filtered - .into_iter() - .map(|(from, rt, conf, created)| { - serde_json::json!({ - "from_entity_id": from, - "relation_type": rt, - "confidence": conf, - "created_at": created - }) - }) - .collect::>() - } - _ => { - let rows = relation::list_relations(&conn, entity_id, relation_type)?; - rows.into_iter() - .map(|(to, rt, conf, created)| { - serde_json::json!({ - "to_entity_id": to, - "relation_type": rt, - "confidence": conf, - "created_at": created - }) - }) - .collect::>() - } - }; - Ok(serde_json::json!({ "success": true, "relations": results })) - } - - fn delete_relations( - &self, - from: &str, - to: &str, - relation_type: Option<&str>, - ) -> anyhow::Result { - let conn = self.conn()?; - let count = match relation_type.filter(|s| !s.is_empty()) { - Some(rt) => conn.execute( - "DELETE FROM relations WHERE from_entity_id = ?1 AND to_entity_id = ?2 AND relation_type = ?3", - rusqlite::params![from, to, rt], - )?, - None => conn.execute( - "DELETE FROM relations WHERE from_entity_id = ?1 AND to_entity_id = ?2", - rusqlite::params![from, to], - )?, - }; - Ok(serde_json::json!({ "success": true, "deleted": count })) - } - - fn list_vault_notes(&self) -> anyhow::Result { - let conn = self.conn()?; - let notes = vault::list_vault_notes(&conn)?; - let results: Vec = notes - .into_iter() - .map(|n| { - serde_json::json!({ - "id": n.id, - "path": n.path, - "title": n.title, - "tags": n.tags, - }) - }) - .collect(); - Ok(serde_json::json!({ "success": true, "count": results.len(), "notes": results })) - } -} - -#[cfg(test)] -pub mod test_helpers; - -#[cfg(test)] -mod tests; +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +//! Registry layer: SQLite-backed entity storage and domain-specific submodules. +//! +//! Central types (`RepoEntry`, `VaultNote`, `PaperEntry`, etc.) and the +//! [`RegistryClient`] trait implementation on [`AppContext`]. +//! Submodules cover repos, health, knowledge, code metrics, call graphs, +//! dead-code analysis, and migrations. + +use crate::clients::RegistryClient; +use crate::storage::AppContext; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteEntry { + pub remote_name: String, + pub upstream_url: Option, + pub default_branch: Option, + pub last_sync: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RepoEntry { + pub id: String, + pub local_path: PathBuf, + pub tags: Vec, + pub discovered_at: DateTime, + pub language: Option, + pub workspace_type: String, + pub data_tier: String, + pub last_synced_at: Option>, + pub stars: Option, + pub remotes: Vec, +} + +/// Tags that mark a repository as "managed" for sync purposes. +/// Stored in the `repo_tags` table (not metadata) because tags are the +/// queryable, filterable dimension — metadata is for opaque JSON. +pub const MANAGED_TAGS: &[&str] = &[ + "mirror", + "reference", + "third-party", + "collaborative", + "team", + "own-project", + "tool", + "active", + "managed", +]; + +impl RepoEntry { + /// Return the 'origin' remote if present, otherwise the first remote. + pub fn primary_remote(&self) -> Option<&RemoteEntry> { + self.remotes + .iter() + .find(|r| r.remote_name == "origin") + .or_else(|| self.remotes.first()) + } + + /// Whether this repo is considered "managed" for sync/health automation. + /// Managed status is determined by the presence of any tag in [`MANAGED_TAGS`]. + pub fn is_managed(&self) -> bool { + self.tags.iter().any(|t| MANAGED_TAGS.contains(&t.as_str())) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VaultNote { + pub id: String, + pub path: String, + pub title: Option, + pub content: String, + pub frontmatter: Option, + pub tags: Vec, + pub outgoing_links: Vec, + pub block_refs: Vec, + pub linked_repo: Option, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PaperEntry { + pub id: String, + pub title: String, + pub authors: Option, + pub venue: Option, + pub year: Option, + pub pdf_path: Option, + pub bibtex: Option, + pub tags: Vec, + pub added_at: DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExperimentEntry { + pub id: String, + pub repo_id: Option, + pub paper_id: Option, + pub config_json: Option, + pub result_path: Option, + pub git_commit: Option, + pub syncthing_folder_id: Option, + pub status: String, + pub timestamp: DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WorkspaceRegistry { + pub version: String, + pub entries: Vec, +} + +impl Default for WorkspaceRegistry { + fn default() -> Self { + Self { + version: "0.1.0".to_string(), + entries: Vec::new(), + } + } +} + +pub use devbase_registry::health::HealthEntry; +pub use devbase_registry::metrics::CodeMetrics; +pub use devbase_registry::workspace::{OplogEntry, OplogEventType, WorkspaceSnapshot}; + +pub mod entity; +pub mod relation; + +// Backward-compatible re-exports (migrated to entity.rs in v0.15). +pub use entity::{ + ENTITY_TYPE_PAPER, ENTITY_TYPE_REPO, ENTITY_TYPE_SKILL, ENTITY_TYPE_VAULT_NOTE, + ENTITY_TYPE_WORKFLOW, upsert_entity, +}; + +pub mod agent_context; +pub mod call_graph; +pub mod code_symbols; +pub mod dead_code; +pub mod health; +pub mod import_ontology; +pub mod knowledge; +pub mod knowledge_meta; +pub mod known_limits; +pub mod links; +pub mod metrics; +mod migrate; +pub mod migrations; +pub mod repo; +pub mod repos_toml; +pub mod vault; +pub mod workspace; + +impl RegistryClient for AppContext { + fn list_repos(&self, _filter: Option<&str>) -> anyhow::Result { + let conn = self.conn()?; + let repos = repo::list_repos(&conn)?; + let results: Vec = repos + .into_iter() + .map(|r| { + serde_json::json!({ + "id": r.id, + "local_path": r.local_path, + "language": r.language, + "tags": r.tags, + "workspace_type": r.workspace_type, + "data_tier": r.data_tier, + }) + }) + .collect(); + Ok(serde_json::json!({ "success": true, "count": results.len(), "repos": results })) + } + + fn get_repo(&self, repo_id: &str) -> anyhow::Result { + let conn = self.conn()?; + let repos = repo::list_repos(&conn)?; + match repos.into_iter().find(|r| r.id == repo_id) { + Some(r) => Ok(serde_json::json!({ + "success": true, + "id": r.id, + "local_path": r.local_path, + "language": r.language, + "tags": r.tags, + "workspace_type": r.workspace_type, + "data_tier": r.data_tier, + })), + None => Ok(serde_json::json!({ "success": false, "error": "repo not found" })), + } + } + + fn list_modules(&self, repo_id: &str) -> anyhow::Result { + let conn = self.conn()?; + let modules = knowledge::list_modules(&conn, repo_id)?; + let results: Vec = modules + .into_iter() + .map(|(name, ty, path)| { + serde_json::json!({ + "name": name, + "type": ty, + "path": path, + }) + }) + .collect(); + Ok(serde_json::json!({ "success": true, "count": results.len(), "modules": results })) + } + + fn save_paper(&self, paper: &serde_json::Value) -> anyhow::Result { + let conn = self.conn()?; + let paper_entry: PaperEntry = serde_json::from_value(paper.clone())?; + knowledge::save_paper(&conn, &paper_entry)?; + Ok(serde_json::json!({ "success": true })) + } + + fn save_experiment(&self, exp: &serde_json::Value) -> anyhow::Result { + let conn = self.conn()?; + let exp_entry: ExperimentEntry = serde_json::from_value(exp.clone())?; + WorkspaceRegistry::save_experiment(&conn, &exp_entry)?; + Ok(serde_json::json!({ "success": true })) + } + + fn list_code_metrics(&self) -> anyhow::Result { + let conn = self.conn()?; + let metrics = metrics::list_code_metrics(&conn)?; + let repos: Vec = metrics + .into_iter() + .map(|(id, m)| { + serde_json::json!({ + "repo_id": id, + "total_lines": m.total_lines, + "source_lines": m.source_lines, + "test_lines": m.test_lines, + "comment_lines": m.comment_lines, + "file_count": m.file_count, + "language_breakdown": m.language_breakdown, + "updated_at": m.updated_at.to_rfc3339() + }) + }) + .collect(); + Ok(serde_json::json!({ "success": true, "count": repos.len(), "repos": repos })) + } + + fn get_code_metrics(&self, repo_id: &str) -> anyhow::Result { + let conn = self.conn()?; + match metrics::get_code_metrics(&conn, repo_id)? { + Some(m) => Ok(serde_json::json!({ + "success": true, + "repo_id": repo_id, + "total_lines": m.total_lines, + "source_lines": m.source_lines, + "test_lines": m.test_lines, + "comment_lines": m.comment_lines, + "file_count": m.file_count, + "language_breakdown": m.language_breakdown, + "updated_at": m.updated_at.to_rfc3339() + })), + None => { + Ok(serde_json::json!({ "success": false, "error": "No metrics found for repo" })) + } + } + } + + fn get_health(&self, repo_id: &str) -> anyhow::Result { + let conn = self.conn()?; + match health::get_health(&conn, repo_id)? { + Some(h) => Ok(serde_json::json!({ + "success": true, + "repo_id": repo_id, + "status": h.status, + "ahead": h.ahead, + "behind": h.behind, + "checked_at": h.checked_at.to_rfc3339() + })), + None => Ok(serde_json::json!({ "success": false, "error": "No health data found" })), + } + } + + fn query_call_graph( + &self, + repo_id: &str, + callee: Option<&str>, + caller: Option<&str>, + file: Option<&str>, + limit: usize, + ) -> anyhow::Result { + let conn = self.conn()?; + let edges = call_graph::query_call_edges( + &conn, + repo_id, + callee.filter(|s| !s.is_empty()), + caller.filter(|s| !s.is_empty()), + file.filter(|s| !s.is_empty()), + limit, + )?; + let calls: Vec = edges + .into_iter() + .map(|e| { + serde_json::json!({ + "caller_file": e.caller_file, + "caller_symbol": e.caller_symbol, + "caller_line": e.caller_line, + "callee_name": e.callee_name, + }) + }) + .collect(); + Ok(serde_json::json!({ + "success": true, + "repo_id": repo_id, + "count": calls.len(), + "calls": calls + })) + } + + fn query_dependencies( + &self, + repo_id: &str, + direction: &str, + relation_type: Option<&str>, + ) -> anyhow::Result { + let conn = self.conn()?; + let rel_filter = relation_type.filter(|s| !s.is_empty()); + let label = if direction == "incoming" || direction == "reverse" { + "reverse dependencies" + } else { + "dependencies" + }; + let rows = if direction == "incoming" || direction == "reverse" { + crate::dependency_graph::list_reverse_dependencies(&conn, repo_id)? + } else { + crate::dependency_graph::list_dependencies(&conn, repo_id)? + }; + let deps: Vec = rows + .into_iter() + .filter(|(_, rel, _)| rel_filter.is_none_or(|f| f == rel)) + .map(|(id, rel, conf)| { + serde_json::json!({ + "repo_id": id, + "relation_type": rel, + "confidence": conf, + }) + }) + .collect(); + Ok(serde_json::json!({ + "success": true, + "repo_id": repo_id, + "direction": direction, + "label": label, + "count": deps.len(), + "dependencies": deps + })) + } + + fn query_code_symbols( + &self, + repo_id: &str, + name: Option<&str>, + symbol_type: Option<&str>, + file: Option<&str>, + limit: usize, + ) -> anyhow::Result { + let conn = self.conn()?; + let symbols = + code_symbols::query_code_symbols(&conn, repo_id, name, symbol_type, file, limit)?; + let out: Vec = symbols + .iter() + .map(|s| { + serde_json::json!({ + "file_path": s.file_path, + "symbol_type": s.symbol_type, + "name": s.name, + "line_start": s.line_start, + "line_end": s.line_end, + "signature": s.signature, + }) + }) + .collect(); + Ok(serde_json::json!({ + "success": true, + "repo_id": repo_id, + "count": out.len(), + "symbols": out + })) + } + + fn query_dead_code( + &self, + repo_id: &str, + include_pub: bool, + limit: usize, + ) -> anyhow::Result { + let conn = self.conn()?; + let dead = dead_code::query_dead_code(&conn, repo_id, include_pub, limit)?; + let out: Vec = dead + .iter() + .map(|d| { + serde_json::json!({ + "file_path": d.file_path, + "name": d.name, + "line_start": d.line_start, + "signature": d.signature, + }) + }) + .collect(); + Ok(serde_json::json!({ + "success": true, + "repo_id": repo_id, + "count": out.len(), + "dead_functions": out + })) + } + + fn save_relation( + &self, + from: &str, + to: &str, + relation_type: &str, + confidence: f64, + ) -> anyhow::Result { + let conn = self.conn()?; + relation::save_relation(&conn, from, to, relation_type, confidence)?; + Ok(serde_json::json!({ "success": true })) + } + + fn query_relations( + &self, + entity_id: &str, + direction: &str, + relation_type: Option<&str>, + ) -> anyhow::Result { + let conn = self.conn()?; + let results = match direction { + "bidirectional" => { + let rows = relation::find_related_entities(&conn, entity_id, relation_type)?; + rows.into_iter() + .map(|(from, to, rt, conf, created)| { + serde_json::json!({ + "from_entity_id": from, + "to_entity_id": to, + "relation_type": rt, + "confidence": conf, + "created_at": created + }) + }) + .collect::>() + } + "incoming" => { + let mut stmt = conn.prepare( + "SELECT from_entity_id, relation_type, confidence, created_at FROM relations + WHERE to_entity_id = ?1 + ORDER BY confidence DESC", + )?; + let rows = stmt.query_map([entity_id], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1)?, + row.get::<_, f64>(2)?, + row.get::<_, String>(3)?, + )) + })?; + let filtered: Vec<_> = if let Some(rt) = relation_type.filter(|s| !s.is_empty()) { + rows.filter(|r| r.as_ref().map(|(_, t, _, _)| t == rt).unwrap_or(false)) + .collect::, _>>()? + } else { + rows.collect::, _>>()? + }; + filtered + .into_iter() + .map(|(from, rt, conf, created)| { + serde_json::json!({ + "from_entity_id": from, + "relation_type": rt, + "confidence": conf, + "created_at": created + }) + }) + .collect::>() + } + _ => { + let rows = relation::list_relations(&conn, entity_id, relation_type)?; + rows.into_iter() + .map(|(to, rt, conf, created)| { + serde_json::json!({ + "to_entity_id": to, + "relation_type": rt, + "confidence": conf, + "created_at": created + }) + }) + .collect::>() + } + }; + Ok(serde_json::json!({ "success": true, "relations": results })) + } + + fn delete_relations( + &self, + from: &str, + to: &str, + relation_type: Option<&str>, + ) -> anyhow::Result { + let conn = self.conn()?; + let count = match relation_type.filter(|s| !s.is_empty()) { + Some(rt) => conn.execute( + "DELETE FROM relations WHERE from_entity_id = ?1 AND to_entity_id = ?2 AND relation_type = ?3", + rusqlite::params![from, to, rt], + )?, + None => conn.execute( + "DELETE FROM relations WHERE from_entity_id = ?1 AND to_entity_id = ?2", + rusqlite::params![from, to], + )?, + }; + Ok(serde_json::json!({ "success": true, "deleted": count })) + } + + fn list_vault_notes(&self) -> anyhow::Result { + let conn = self.conn()?; + let notes = vault::list_vault_notes(&conn)?; + let results: Vec = notes + .into_iter() + .map(|n| { + serde_json::json!({ + "id": n.id, + "path": n.path, + "title": n.title, + "tags": n.tags, + }) + }) + .collect(); + Ok(serde_json::json!({ "success": true, "count": results.len(), "notes": results })) + } +} + +#[cfg(test)] +pub mod test_helpers; + +#[cfg(test)] +mod tests; diff --git a/src/registry/import_ontology.rs b/src/registry/import_ontology.rs new file mode 100644 index 0000000..37102c9 --- /dev/null +++ b/src/registry/import_ontology.rs @@ -0,0 +1,197 @@ +use std::path::Path; + +use rusqlite::Connection; + +type Result = std::result::Result; + +/// Statistics from an ontology import run. +#[derive(Debug, Clone, Default)] +pub struct OntologyImportStats { + pub entities_added: usize, + pub entities_updated: usize, + pub relations_added: usize, + pub relations_updated: usize, + pub errors: Vec, +} + +/// Import ontology entities and relations from an OpenClaw-compatible workspace. +pub fn import_ontology( + conn: &Connection, + workspace_path: &Path, +) -> Result { + let entities_dir = workspace_path.join("ontology").join("entities"); + let relations_file = workspace_path.join("ontology").join("relations").join("core-relations.jsonl"); + + let mut stats = OntologyImportStats::default(); + + // Phase 1: Import entities + if entities_dir.is_dir() { + for entry in std::fs::read_dir(&entities_dir)? { + let entry = entry?; + let path = entry.path(); + if path.extension().map_or(false, |e| e == "json") { + match import_entity_file(conn, &path) { + Ok((added, updated)) => { + stats.entities_added += added; + stats.entities_updated += updated; + } + Err(e) => { + stats.errors.push(format!("{}: {}", path.display(), e)); + } + } + } + } + } + + // Phase 2: Import relations + if relations_file.exists() { + match import_relations_file(conn, &relations_file) { + Ok((added, updated)) => { + stats.relations_added += added; + stats.relations_updated += updated; + } + Err(e) => { + stats.errors.push(format!("{}: {}", relations_file.display(), e)); + } + } + } + + Ok(stats) +} + +fn import_entity_file(conn: &Connection, path: &Path) -> Result<(usize, usize)> { + let content = std::fs::read_to_string(path)?; + let entity: serde_json::Value = serde_json::from_str(&content)?; + + let entity_id = entity["entity_id"].as_str().unwrap_or("unknown"); + let entity_type = entity["type"].as_str().unwrap_or("ontology_node"); + let name = entity["name"].as_str().unwrap_or(entity_id); + + // Ensure entity type exists + conn.execute( + "INSERT OR IGNORE INTO entity_types (name, schema_json, description, created_at) VALUES (?1, '{}', ?2, datetime('now'))", + rusqlite::params![entity_type, format!("Ontology entity type: {}", entity_type)], + )?; + + let metadata = serde_json::to_string(&entity).unwrap_or_default(); + let now = chrono::Utc::now().to_rfc3339(); + let existing: Option = conn + .query_row( + "SELECT id FROM entities WHERE id = ?1", + rusqlite::params![entity_id], + |row| row.get(0), + ) + .ok(); + + conn.execute( + "INSERT INTO entities (id, entity_type, name, metadata, created_at, updated_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?5) + ON CONFLICT(id) DO UPDATE SET + entity_type = excluded.entity_type, + name = excluded.name, + metadata = excluded.metadata, + updated_at = excluded.updated_at", + rusqlite::params![entity_id, entity_type, name, metadata, now], + )?; + + if existing.is_some() { + Ok((0, 1)) + } else { + Ok((1, 0)) + } +} + +fn import_relations_file(conn: &Connection, path: &Path) -> Result<(usize, usize)> { + let content = std::fs::read_to_string(path)?; + let mut added = 0usize; + let mut updated = 0usize; + + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + + let rel: serde_json::Value = match serde_json::from_str(trimmed) { + Ok(v) => v, + Err(e) => { + tracing::warn!("Skipping malformed relation line: {} ({})", trimmed, e); + continue; + } + }; + + let relation_id = rel["relation_id"].as_str().unwrap_or(""); + let rel_type = rel["type"].as_str().unwrap_or("unknown"); + let from_id = rel["from"].as_str().unwrap_or(""); + let to_id = rel["to"].as_str().unwrap_or(""); + + if relation_id.is_empty() || from_id.is_empty() || to_id.is_empty() { + continue; + } + + let metadata = serde_json::to_string(&rel).unwrap_or_default(); + let now = chrono::Utc::now().to_rfc3339(); + let exists = conn + .query_row( + "SELECT id FROM relations WHERE from_entity_id = ?1 AND to_entity_id = ?2 AND relation_type = ?3", + rusqlite::params![from_id, to_id, rel_type], + |row| row.get::<_, String>(0), + ) + .is_ok(); + + if exists { + updated += 1; + } else { + added += 1; + } + + conn.execute( + "INSERT OR REPLACE INTO relations (id, from_entity_id, to_entity_id, relation_type, metadata, confidence, created_at) + VALUES (?1, ?2, ?3, ?4, ?5, 1.0, ?6)", + rusqlite::params![relation_id, from_id, to_id, rel_type, metadata, now], + )?; + } + + Ok((added, updated)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::registry::WorkspaceRegistry; + + #[test] + fn test_import_ontology_from_temp() { + let tmp = std::env::temp_dir().join(format!("devbase_onto_{}", std::process::id())); + let entities_dir = tmp.join("ontology").join("entities"); + let relations_dir = tmp.join("ontology").join("relations"); + std::fs::create_dir_all(&entities_dir).unwrap(); + std::fs::create_dir_all(&relations_dir).unwrap(); + + std::fs::write( + entities_dir.join("person-a.json"), + r#"{"entity_id":"person-a","type":"person","name":"Alpha","aliases":["a"]}"#, + ) + .unwrap(); + std::fs::write( + entities_dir.join("person-b.json"), + r#"{"entity_id":"person-b","type":"person","name":"Beta","aliases":["b"]}"#, + ) + .unwrap(); + std::fs::write( + relations_dir.join("core-relations.jsonl"), + r#"{"relation_id":"r-test","type":"knows","from":"person-a","to":"person-b"} +{"relation_id":"r-test2","type":"collaborates","from":"person-b","to":"person-a"}"#, + ) + .unwrap(); + + let conn = WorkspaceRegistry::init_in_memory().unwrap(); + let stats = import_ontology(&conn, &tmp).unwrap(); + + assert_eq!(stats.entities_added, 2); + assert_eq!(stats.relations_added, 2); + assert!(stats.errors.is_empty()); + + std::fs::remove_dir_all(&tmp).unwrap(); + } +} From 3142a56d05a0e548293ef2fc444991bf48ef2463 Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 6 Jun 2026 21:26:53 +0800 Subject: [PATCH 06/11] Update CLAUDE.md: 71 MCP tools, add ontology import Co-Authored-By: Claude Opus 4.7 --- .claude/CLAUDE.md | 186 +++++++++++++++++++++++----------------------- 1 file changed, 93 insertions(+), 93 deletions(-) diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 06586b4..3aa0163 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -1,93 +1,93 @@ -# devbase — Cognitive Anchor - -> **Purpose**: This file is designed to survive context compression. It contains -> immutable facts and current state that every AI session must know before -> working on this project. If you are reading this after a context reset, -> treat this as your primary source of truth. - ---- - -## Immutable Facts(不可变事实) - -| ID | Fact | Source | Status | -|----|------|--------|--------| -| F-001 | Version | `Cargo.toml` | **v0.20.1** | -| F-002 | Edition | `Cargo.toml` | **Rust 2024** | -| F-003 | Test Coverage | CI | **494 passed, 0 failed, 5 ignored** | -| F-004 | Production Unwrap | Architecture Invariants | **0** (G5 rule enforced) | -| F-005 | MCP Tools | `src/mcp/mod.rs` | **70** (5 Stable / 61 Beta / 4 Experimental) | -| F-006 | Schema Version | `registry/migrate.rs` | **v36** | -| F-007 | Entities Table | Schema v21+ | **唯一真相源** (`repos` 表已删除) | -| F-008 | SQLite Mode | `storage.rs` | **WAL mode** | -| F-009 | Clippy | CI | **`-D warnings` 全绿** | -| F-010 | Release Assets | GitHub Releases | **Linux + Windows x64** 预编译二进制 | - -## 架构红线(Architecture Guardrails) - -- **RF-1**: 无裸 `init_db()` 调用,全部使用 `StorageBackend` 注入 -- **RF-2**: `TempStorageBackend` 用于测试隔离(禁止 `DEVBASE_DATA_DIR` 竞态) -- **RF-3**: `entities` 表是唯一真相源 -- **RF-4**: 二进制上下文 ≤ 1MB -- **RF-5**: 模块间无循环依赖 -- **RF-6**: 生产代码零 `unwrap`/`expect`/`panic`(测试除外) -- **RF-7**: 路径输出必须脱敏(`sanitize_path()` 掩码 home 目录) - -## 当前上下文(Current Context) - -| 属性 | 值 | -|------|-----| -| 默认分支 | `main` | -| 最新 Release | `v0.20.1` (2026-05-17) | -| 当前 Phase | Phase 1 Production Hardening ✅ 完成 | -| 下一 Phase | Phase 12 — v0.21.0 "External Capability Grafting" | -| 活跃 PR | 无(PR #55 已合并) | - -## 已知架构 Gaps(不可与 Immutable Facts 混淆) - -这些是**待实现**的能力,不是 bug: - -| Gap | 影响 | 计划版本 | 状态 | -|-----|------|----------|------| -| ~~`relations` 表零生产读取路径~~ | ~~统一实体模型的图遍历能力未暴露~~ | ~~v0.21.0~~ | **已完成** — `devkit_relation_store/query/delete` 已存在,`project_context` 已读取 | -| ~~Workflow 引擎零 MCP 暴露~~ | ~~AI 无法发现/触发工作流~~ | ~~v0.21.0~~ | **已完成** — `devkit_workflow_list/run/status` 已存在 | -| ~~`project_context` 不完整~~ | ~~缺少 relations/limits/skills/workflows~~ | ~~v0.21.0~~ | **已完成** — 已补充 `known_limits` + `skills` | -| 31/68 MCP 工具缺少调用测试 | 回归风险 | v0.21.0 | 待评估 | -| ~~`mcp/tools/repo.rs` 2376 行~~ | ~~维护负担~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `tools/` 目录,`repo.rs` 现 730 行 | -| ~~`init_db_at` 1214 行~~ | ~~迁移函数过大~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `registry/migrate.rs`(503 行)+ `repo.rs` + `vault.rs` + `links.rs` | - -## 防失忆校验清单(每次会话启动) - -- [ ] 已读取本文件(`devbase/.claude/CLAUDE.md`) -- [ ] 已确认 `Cargo.toml` 版本与上表 F-001 一致 -- [ ] 如果 handoff 文档说"未完成",确认是新环境问题还是全局阻塞 -- [ ] 如果修改 Schema,已更新 `registry/migrate.rs` 和 `SCHEMA_DDL` - -## 快速入口 - -| 你想做什么 | 命令 | -|-----------|------| -| 运行测试 | `cargo test --all-targets` | -| 检查 clippy | `cargo clippy --all-targets -D warnings` | -| 检查格式化 | `cargo fmt --check` | -| 运行 invariant checks | `scripts/invariant-checks/run-checks.ps1` | -| 启动 MCP Server | `cargo run -- mcp` | -| 启动 TUI | `cargo run -- tui` | -| 扫描当前目录 | `devbase scan . --register` | -| 索引仓库 | `devbase index` | - -## 关键文件映射 - -| 概念 | 文件 | -|------|------| -| 架构决策 | `docs/architecture/` | -| 稳定工具文档 | `docs/reference/stable-tools/` | -| 快速开始 | `docs/guides/quickstart.md` | -| MCP 集成指南 | `docs/guides/mcp-integration.md` | -| 变更日志 | `CHANGELOG.md` | -| Agent 简报 | `AGENTS.md` | -| 贡献指南 | `CONTRIBUTING.md` | - ---- - -**Last Updated**: 2026-05-20 by Claude Opus 4.7 -**Version**: v0.20.1 +# devbase — Cognitive Anchor + +> **Purpose**: This file is designed to survive context compression. It contains +> immutable facts and current state that every AI session must know before +> working on this project. If you are reading this after a context reset, +> treat this as your primary source of truth. + +--- + +## Immutable Facts(不可变事实) + +| ID | Fact | Source | Status | +|----|------|--------|--------| +| F-001 | Version | `Cargo.toml` | **v0.20.1** | +| F-002 | Edition | `Cargo.toml` | **Rust 2024** | +| F-003 | Test Coverage | CI | **494 passed, 0 failed, 5 ignored** | +| F-004 | Production Unwrap | Architecture Invariants | **0** (G5 rule enforced) | +| F-005 | MCP Tools | `src/mcp/mod.rs` | **71** (5 Stable / 62 Beta / 4 Experimental) | +| F-006 | Schema Version | `registry/migrate.rs` | **v36** | +| F-007 | Entities Table | Schema v21+ | **唯一真相源** (`repos` 表已删除) | +| F-008 | SQLite Mode | `storage.rs` | **WAL mode** | +| F-009 | Clippy | CI | **`-D warnings` 全绿** | +| F-010 | Release Assets | GitHub Releases | **Linux + Windows x64** 预编译二进制 | + +## 架构红线(Architecture Guardrails) + +- **RF-1**: 无裸 `init_db()` 调用,全部使用 `StorageBackend` 注入 +- **RF-2**: `TempStorageBackend` 用于测试隔离(禁止 `DEVBASE_DATA_DIR` 竞态) +- **RF-3**: `entities` 表是唯一真相源 +- **RF-4**: 二进制上下文 ≤ 1MB +- **RF-5**: 模块间无循环依赖 +- **RF-6**: 生产代码零 `unwrap`/`expect`/`panic`(测试除外) +- **RF-7**: 路径输出必须脱敏(`sanitize_path()` 掩码 home 目录) + +## 当前上下文(Current Context) + +| 属性 | 值 | +|------|-----| +| 默认分支 | `main` | +| 最新 Release | `v0.20.1` (2026-05-17) | +| 当前 Phase | Phase 1 Production Hardening ✅ 完成 | +| 下一 Phase | Phase 12 — v0.21.0 "External Capability Grafting" | +| 活跃 PR | 无(PR #55 已合并) | + +## 已知架构 Gaps(不可与 Immutable Facts 混淆) + +这些是**待实现**的能力,不是 bug: + +| Gap | 影响 | 计划版本 | 状态 | +|-----|------|----------|------| +| ~~`relations` 表零生产读取路径~~ | ~~统一实体模型的图遍历能力未暴露~~ | ~~v0.21.0~~ | **已完成** — `devkit_relation_store/query/delete` 已存在,`project_context` 已读取 | +| ~~Workflow 引擎零 MCP 暴露~~ | ~~AI 无法发现/触发工作流~~ | ~~v0.21.0~~ | **已完成** — `devkit_workflow_list/run/status` 已存在 | +| ~~`project_context` 不完整~~ | ~~缺少 relations/limits/skills/workflows~~ | ~~v0.21.0~~ | **已完成** — 已补充 `known_limits` + `skills` | +| 31/68 MCP 工具缺少调用测试 | 回归风险 | v0.21.0 | 待评估 | +| ~~`mcp/tools/repo.rs` 2376 行~~ | ~~维护负担~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `tools/` 目录,`repo.rs` 现 730 行 | +| ~~`init_db_at` 1214 行~~ | ~~迁移函数过大~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `registry/migrate.rs`(503 行)+ `repo.rs` + `vault.rs` + `links.rs` | + +## 防失忆校验清单(每次会话启动) + +- [ ] 已读取本文件(`devbase/.claude/CLAUDE.md`) +- [ ] 已确认 `Cargo.toml` 版本与上表 F-001 一致 +- [ ] 如果 handoff 文档说"未完成",确认是新环境问题还是全局阻塞 +- [ ] 如果修改 Schema,已更新 `registry/migrate.rs` 和 `SCHEMA_DDL` + +## 快速入口 + +| 你想做什么 | 命令 | +|-----------|------| +| 运行测试 | `cargo test --all-targets` | +| 检查 clippy | `cargo clippy --all-targets -D warnings` | +| 检查格式化 | `cargo fmt --check` | +| 运行 invariant checks | `scripts/invariant-checks/run-checks.ps1` | +| 启动 MCP Server | `cargo run -- mcp` | +| 启动 TUI | `cargo run -- tui` | +| 扫描当前目录 | `devbase scan . --register` | +| 索引仓库 | `devbase index` | + +## 关键文件映射 + +| 概念 | 文件 | +|------|------| +| 架构决策 | `docs/architecture/` | +| 稳定工具文档 | `docs/reference/stable-tools/` | +| 快速开始 | `docs/guides/quickstart.md` | +| MCP 集成指南 | `docs/guides/mcp-integration.md` | +| 变更日志 | `CHANGELOG.md` | +| Agent 简报 | `AGENTS.md` | +| 贡献指南 | `CONTRIBUTING.md` | + +--- + +**Last Updated**: 2026-05-20 by Claude Opus 4.7 +**Version**: v0.20.1 From 29e44433671ff551986aac097a966daac5d1c96d Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 6 Jun 2026 21:34:44 +0800 Subject: [PATCH 07/11] Fix ontology import: graceful FK handling, CLI workspace Option - Skip relations referencing non-existent entities instead of failing - Fix CLI: workspace is Option for natural default detection - Verified: 2 entities + 2 relations imported to registry Co-Authored-By: Claude Opus 4.7 --- src/main.rs | 5 ++--- src/registry/import_ontology.rs | 36 +++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/main.rs b/src/main.rs index b649946..66a118a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -305,8 +305,7 @@ pub(crate) enum Commands { /// Import ontology entities and relations from an OpenClaw workspace Ontology { /// Path to OpenClaw workspace (defaults to ~/.kimi_openclaw/workspace) - #[arg(default_value = "")] - workspace: String, + workspace: Option, /// Dry-run: list entities/relations without importing #[arg(long)] dry_run: bool, @@ -814,7 +813,7 @@ async fn main() -> anyhow::Result<()> { commands::skill::run_skill(&mut ctx, cmd)?; } Commands::Ontology { workspace, dry_run } => { - commands::ontology::run_import(&mut ctx, &workspace, dry_run)?; + commands::ontology::run_import(&mut ctx, workspace.as_deref().unwrap_or(""), dry_run)?; } Commands::Workflow { cmd } => { commands::workflow::run_workflow(&mut ctx, cmd)?; diff --git a/src/registry/import_ontology.rs b/src/registry/import_ontology.rs index 37102c9..e42ddb3 100644 --- a/src/registry/import_ontology.rs +++ b/src/registry/import_ontology.rs @@ -139,17 +139,41 @@ fn import_relations_file(conn: &Connection, path: &Path) -> Result<(usize, usize ) .is_ok(); - if exists { - updated += 1; - } else { - added += 1; + // Skip relations referencing non-existent entities (FK constraint) + let from_exists: bool = conn + .query_row( + "SELECT 1 FROM entities WHERE id = ?1", + rusqlite::params![from_id], + |_| Ok(true), + ) + .unwrap_or(false); + let to_exists: bool = conn + .query_row( + "SELECT 1 FROM entities WHERE id = ?1", + rusqlite::params![to_id], + |_| Ok(true), + ) + .unwrap_or(false); + if !from_exists || !to_exists { + tracing::warn!("Skipping relation {}: from or to entity not found", relation_id); + continue; } - conn.execute( + let conn_exec = conn.execute( "INSERT OR REPLACE INTO relations (id, from_entity_id, to_entity_id, relation_type, metadata, confidence, created_at) VALUES (?1, ?2, ?3, ?4, ?5, 1.0, ?6)", rusqlite::params![relation_id, from_id, to_id, rel_type, metadata, now], - )?; + ); + if let Err(e) = conn_exec { + tracing::warn!("Failed to insert relation {}: {}", relation_id, e); + continue; + } + + if exists { + updated += 1; + } else { + added += 1; + } } Ok((added, updated)) From f859f9f1e50ba2064c08e8b0bfa678dbb8f9e94a Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 6 Jun 2026 21:45:11 +0800 Subject: [PATCH 08/11] Cleanup: update all docs to 71 tools, track junction script, remove personal notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README: 70→71 MCP tools, update project structure comment - CHANGELOG: add Phase 2 (multi-vault) + Phase 3 (ontology import) - AGENTS: Schema 36, 71 tools, 495 tests, add Ontology Import+Skill Sync - Track scripts/Sync-WorkspaceJunctions.ps1 (Obsidian↔workspace junctions) - Remove personal development notes (moved to recycle bin) Co-Authored-By: Claude Opus 4.7 --- AGENTS.md | 516 +++++----- CHANGELOG.md | 1386 ++++++++++++++------------- README.md | 292 +++--- scripts/Sync-WorkspaceJunctions.ps1 | 66 ++ 4 files changed, 1164 insertions(+), 1096 deletions(-) create mode 100644 scripts/Sync-WorkspaceJunctions.ps1 diff --git a/AGENTS.md b/AGENTS.md index 71577d7..613efdf 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,258 +1,258 @@ -# Agent 环境指引 - -`devbase` 是 **本地情境编译器(Local Context Compiler)** —— AI agent 在本地数字世界中的海马体。 - -> 它将本地数字资产的原始数据(代码库、笔记、Skill、工作流)编译为 AI 可决策的结构化情境,不负责思考,不负责执行,只负责感知、编码、持久化、检索。 - -- **当前阶段**:阶段十一 — v0.20.0 已发布(知识完备性) -- **当前版本**:v0.20.1(Schema 36,70 MCP tools,494 tests) -- **已完成里程碑**:Registry God Object 完全拆解(10 子模块提取)+ 18 workspace crates 提取 + MCP Python SDK 1.16.0 兼容修复 + repo.rs trait 化 + flaky 测试根治(RF-2.1/2.2/2.3)+ 许可证迁移 + health 性能优化(-44%)+ index skip-embeddings + batch encoding 实验 + RF-6 清零 + 架构治理文档(ADR/不变量清单)+ Tantivy BM25 代码符号搜索(P1)+ AppContext 职责拆分 Phase 1/2(storage.rs 860→430 行)+ 架构不变量 CI(G5/T11/T12)+ Embedding 多后端(Candle/Ollama 配置切换, P3)+ EnvVersionCache 扩展(9 工具链检测, P4)+ **v0.16.0 Agent Contexts(P1/P2/P3)**:`agent_contexts`/`agent_memories`/`context_entity_links` Schema + 9 个 Session MCP tools + Context-aware Skill Runtime(`DEVBASE_ACTIVE_CONTEXT` 注入)+ **v0.16.1 Workflow-Session Binding**:`workflow_executions.context_id` + 执行自动绑定 Active Context + **v0.17.0 Embedding Externalization**:`embedding` 从 default features 移除(Candle/Ollama 降级为 opt-in `llm-backend`)+ Schema 34 向量存储 + `cosine_similarity` SQLite UDF + `devkit_session_recall` / `devkit_session_index`(60 tools)+ **v0.18.0 ClaudeCode Integration**:`devkit_project_brief`(Markdown 项目简报)+ `devkit_impact_analysis`(修改影响范围分析)+ `devkit_session_export` / `devkit_session_import` + `scripts/devbase-claude.ps1` 启动器(自动注入 `.claude/CLAUDE.md`)+ RFC `docs/RFC/claudecode-workflow-integration.md`(64 tools)+ **v0.18.0 发布收尾**:PR 合并 + 双平台二进制构建 + GitHub Release + 根目录治理 + 世界模型战略认知沉淀(Vault + AGENTS 双向联动)+ NotebookLM 生态消化(5 项目注册)+ GreptimeDB 互补分析 + **v0.19.0 知识基础设施硬化**:SQLite WAL 默认启用 + `devkit_index_health`(Beta)+ Vault 导出(`devkit_vault_export`)+ Redis ADR 决策(放弃引入)+ **v0.20.0 知识完备性**:Vault 双向链接 BFS 图遍历(`devkit_vault_graph` 扩展)+ Vault Git-based 历史追踪(`devkit_vault_history`,第 67 个 tool)+ 混合检索质量监控(`devkit_search_quality`,第 68 个 tool,`HybridSearchMetrics`)+ Block 引用支持(`WikiLink.anchor`:`[[note#heading]]` / `[[note#^block-id]]`)+ 性能回归基线(`#[ignore]` 1k/10k 阈值测试)+ 客户端无关原则(Client-Agnostic Principle)落地 + `skill sync` 泛化接口(零硬编码客户端路径) -- **核心方向**:让 Kimi CLI 在调用文件工具之前,先通过 devbase 获得"该读哪些文件、为什么读、它们之间的关系" -- **本质分析**:见 `vault/99-Meta/devbase-essence-analysis-20260430.md` 与 `docs/architecture/redefinition.md` -- **设计文档**: - - [`docs/architecture/workflow-dsl.md`](docs/architecture/workflow-dsl.md) — Workflow DSL 规范 - - [`docs/architecture/workspace-as-schema.md`](docs/architecture/workspace-as-schema.md) — 统一实体模型设计 - - [`docs/RFC/agent-memory-vector-storage.md`](docs/RFC/agent-memory-vector-storage.md) — v0.17.0 Agent Memory 向量存储 RFC(Embedding 职责外迁设计) - - [`docs/guides/mcp-integration-guide.md`](docs/guides/mcp-integration-guide.md) — MCP 集成指南 - - [`docs/README.md`](docs/README.md) — 完整文档导航 - -Skill Runtime 全生命周期已落地(含依赖管理 Schema v15),Schema v16 统一实体模型(entities/relations)已落地,Skill 自动封装(`discover`)已落地。 - -- **技术栈**:Rust 2024, SQLite, tokio, ratatui, git2, reqwest, tantivy -- **Registry DB**:`%LOCALAPPDATA%\devbase\registry.db`(轻量索引,用户本地,永不进入版本控制) -- **Workspace**:`%LOCALAPPDATA%\devbase\workspace/` —— 文件系统 = source of truth - - `vault/` —— PARA 结构:00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - - `assets/` —— 二进制资源 -- **MCP Server**:stdio only,**70 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 5 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具 + 1 个 DocumentConvert 工具);配置见 `mcp.json` -- **Kimi CLI 集成**:MCP server 已通过 `kimi mcp add` 注册,端到端验证通过(`kimi --print` 成功调用 `devkit_health`);项目级 skill 位于 `.kimi/skills/devbase-project/SKILL.md` -- **统一节点模型**:`core::node::{Node, NodeType, Edge}` —— GitRepo / VaultNote / Asset / ExternalLink -- **当前测试**:476 lib passed / 0 failed / 5 ignored + 7/7 integration passed + 11/11 workflow passed(共 494) -- **编译状态**:0 warning / 0 vulnerabilities(`cargo audit` 干净,除上游 `tokei` 的 `RUSTSEC-2020-0163`) -- **Workspace 结构**:`crates/` 目录已启用,19 个零耦合模块已提取为独立 crate(`devbase-symbol-links`, `devbase-sync-protocol`, `devbase-core-types`, `devbase-syncthing-client`, `devbase-vault-frontmatter`, `devbase-vault-wikilink`, `devbase-workflow-interpolate`, `devbase-workflow-model`, `devbase-registry-health`, `devbase-registry-metrics`, `devbase-registry-workspace`, `devbase-embedding`, `devbase-skill-runtime-types`, `devbase-skill-runtime-parser`, `devbase-registry-entity`, `devbase-registry-relation`, `devbase-registry-call-graph`, `devbase-registry-dead-code`, `devbase-registry-code-symbols`) -- **Workflow Engine**:YAML 解析 + 拓扑调度 + batch 并行执行 + 5 种 step 类型(skill/subworkflow/parallel/condition/loop) -- **NLQ 自然语言查询**:TUI `[:]` 触发 embedding 语义搜索,fallback 降级文本搜索 -- **Mind Market 评分**:success_rate / usage_count / rating(0-5),`skill recalc-scores/top/recommend` - -## 关键约定 - -1. **文件操作**:读取用 `ReadFile`,搜索用 `Grep`/`Glob`,修改用 `StrReplaceFile`,整文件重写用 `WriteFile` -2. **Shell**:Windows PowerShell;用 `;` 分隔命令 -3. **Git**:提交前必须通过 `cargo test --all-targets` + `cargo clippy --all-targets -D warnings` + `cargo fmt --check` -4. **Schema 迁移**:`PRAGMA user_version` 安全升级;升级前自动调用 `backup::auto_backup_before_migration()` - -## 安全原则 - -### 本地优先(Local-First) - -- **Registry DB** 始终存储在用户的本地配置目录(`dirs::config_dir()/devbase/`),绝不向远程传输 -- **代码内容** 不会被上传到任何云端服务(除非用户显式配置 GitHub token 用于 stars 查询) -- **MCP Server** 仅通过 stdio 本地进程通信,不暴露网络端口 - -### 客户端无关(Client-Agnostic) - -> devbase 的核心能力(编排、注册、索引、搜索、同步)必须在不依赖任何特定 AI 客户端的前提下独立运行。 - -- ✅ **允许**:向通用目录输出数据,由用户自行分发给任意客户端(如 `skill sync --output-dir ./plans`) -- ✅ **允许**:实现标准协议(MCP)供任意客户端连接 -- ❌ **禁止**:核心能力硬编码特定客户端的路径、API、或配置格式(如 `C:\Users\xxx\.claude`) -- ❌ **禁止**:核心能力的可用性取决于某个客户端是否安装 -- 🟡 **适配层**:`scripts/claude/`、`docs/clients/` 等目录下的客户端适配脚本属于配套示例,不归入核心版本控制 - -### 凭证管理 - -- GitHub token、LLM API key 存储在本地 `config.toml` 中 -- `config.toml` 位于用户配置目录,**不在项目工作目录**,因此不会被意外 `git commit` -- 默认配置模板中的 token 字段使用占位符 ``,避免真实 token 格式泄露 -- `.gitignore` 已覆盖 `*.db`、`.devbase/`、`.env*`、`*.local.toml` - -### 审计与备份 - -- 所有 `scan`/`sync`/`health` 操作自动写入 OpLog(SQLite `oplog` 表) -- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 -- Registry 支持 `export`/`import` 用于用户自主备份 - -## 许可证策略 - -- **主许可证**: AGPL-3.0-or-later (`LICENSE`) -- **商业授权**: 双许可模式,闭源/专有 SaaS 使用需联系作者 (`LICENSE-COMMERCIAL.md`) -- **Cargo.toml**: `license = "AGPL-3.0-or-later"` -- **SPDX 头**: 新增源文件应在顶部包含 AGPL-3.0 声明(见 `LICENSE` 末尾 "How to Apply" 部分) - -## 架构状态(Wave 15b 完成) - -| 维度 | 状态 | -|------|------| -| 代码质量 | `rustfmt.toml` + `cargo fmt` + `clippy -D warnings` 全绿 | -| 模块拆分 | `sync`→5 / `registry`→11 / `mcp` 测试分离 / `search`→hybrid / `oplog_analytics` / `symbol_links` / **workspace: 3 crates extracted** | -| 库/二进制 | `src/lib.rs` 导出全部 **30+** 个模块;`src/main.rs` 仅 CLI 入口 | -| TUI 架构 | `render/` 6 子模块 + `theme.rs` Design Token + `layout.rs` 响应式引擎 | -| 数据层 | Schema v23: `repos`/`vault_notes`/`papers`/`workflows`/`repo_modules_legacy` 表已删除;`entities` 为唯一数据源;`repo_tags/repo_remotes/repo_health/...` 为独立 JOIN 表(无 FK);仅 `skills` 保留独立表(embedding BLOB) | -| CI/CD | `.github/workflows/ci.yml`:check / test / fmt / clippy on Windows | -| 依赖安全 | `cargo audit` 0 漏洞(除上游 `tokei` 的 `RUSTSEC-2020-0163`) | - -## 架构红线(Architecture Guardrails) - -> 基于第一性原理的工程约束。违反任意一条 = HALT,转交人类裁决或回滚。 -> 规则编号 `RF-XX`(Red-line / Fitness function),带客观测量标准,非主观描述。 - -### RF-1: 依赖注入优于全局状态(Global State Anti-Pattern) - -**理论锚定**:全局可变状态使组件隐式耦合,破坏可测试性与可复用性(参考:Pure Function / DI 原则)。 - -**规则**: -- 禁止新增 `dirs::data_local_dir()` / `std::env::var_os` 硬编码路径。 -- 所有 IO 边界路径(DB、索引、备份、配置)必须通过参数、构造函数或 `trait` 注入。 -- **例外(Grandfathered)**:现有 3 处(`backup_dir`、`db_path`、`index_path`)在重构前不得新增第 4 处。 - -**Fitness Function**: -```bash -# 新增 PR 中不得出现新的全局路径硬编码 -grep -rn "dirs::data_local_dir\|std::env::var_os\|std::env::var(\"LOCALAPPDATA\"" src/ \ - | grep -v "backup.rs\|migrate.rs\|search.rs" -# 预期输出:空 -``` - -### RF-2: 测试密封性(Hermetic Testing) - -**理论锚定**:测试失败必须仅因被测代码缺陷,不因外部因素、测试顺序或并行调度(参考:Google Test Blog — Hermetic Servers)。 - -**规则**: -- 所有测试禁止修改全局进程状态(`std::env::set_var`、`static mut`、全局文件系统句柄)。 -- 文件系统测试必须使用 `tempfile` + 注入式路径,禁止直接操作 `%LOCALAPPDATA%` 或 `~/.config`。 -- Tantivy / SQLite 文件系统测试必须获取 `SEARCH_TEST_LOCK`(或同等级串行化机制)。 - -**子规则(来自 PR #4 教训)**: -- **R2.1 禁止 `DEVBASE_DATA_DIR` 全局注入**:并行测试中 `std::env::set_var("DEVBASE_DATA_DIR", ...)` 导致竞态;必须使用 `TempStorageBackend` 注入式替代。 -- **R2.2 Windows 路径双端规范化**:`TempDir` 可能返回短文件名(`TEMP~1`),而 `dunce::canonicalize` 返回长文件名;路径比较前必须对**双方**调用 `dunce::canonicalize`。 -- **R2.3 `git2` 测试显式身份 + 显式分支**: - - CI runner 无全局 `user.name`/`user.email` → `repo.signature()` 会 panic;必须改用 `git2::Signature::now("Test", "test@example.com")`。 - - `git2::Repository::init` 的默认分支在不同平台可能为 `master` 或 `main`;必须显式 `repo.set_head("refs/heads/main")` 并 commit 到 `"refs/heads/main"`。 - -**Fitness Function**: -```bash -# 高并发下 100% 通过,无 flaky -cargo test --test-threads=16 -``` - -### RF-3: Schema 单一事实来源(Single Source of Truth) - -**理论锚定**:重复信息必然 drift(参考:DRY 原则 + Evolutionary Architecture 的版本一致性约束)。 - -**规则**: -- `SCHEMA_DDL`(`registry/test_helpers.rs`)与 `migrate.rs` 必须原子同步。 -- 新增表、索引、列必须同时出现在两者中;禁止仅更新其一。 - -**Fitness Function**: -- CI 运行 `test_in_memory_schema_version` + schema 结构比对脚本(可手动运行 `cargo test registry::test_helpers::tests` 验证)。 - -### RF-4: 二进制入口限界(Bounded Context) - -**理论锚定**:CLI 入口应仅做命令分发,业务逻辑应在 lib 模块中(参考:Hexagonal Architecture / Ports & Adapters)。 - -**规则**: -- `main.rs` 行数不得超过 **1000 行**。 -- 新增 CLI 命令必须先拆分为 `commands/` 子模块或独立函数,禁止在 `main.rs` 中堆积业务逻辑。 - -**Fitness Function**: -```bash -# 当前 515 行(Phase 1/2/3 已削减 1003 行),远超目标 -[ $(wc -l < src/main.rs) -le 1000 ] || exit 1 -``` - -### RF-5: 无循环依赖(Acyclic Dependencies) - -**理论锚定**:循环依赖破坏模块化,使增量编译和独立复用不可能(参考:John Lakos — Large-Scale C++ Software Design)。 - -**规则**: -- 禁止模块间双向 `use crate::` 引用。 -- 新增模块必须通过脚本验证无循环(当前已满足,未来 PR 保持)。 - -**Fitness Function**: -```bash -# 文件级双向依赖检测(当前输出应为空) -for f in src/**/*.rs; do - name=$(basename "$f" .rs) - refs=$(grep -o 'use crate::[a-z_]*' "$f" | sed 's/use crate:://') - for r in $refs; do - if [ -f "src/$r.rs" ] && grep -q "use crate::$name\b" "src/$r.rs"; then - echo "CYCLE: $name <-> $r" - fi - done -done -``` - -### RF-7: Workspace 拆分约束(Module Distribution Readiness) - -**理论锚定**:模块能否独立发布是耦合健康度的金标准;不能拆分的模块 = 耦合不健康的模块。 - -**规则**: -- 新增模块若对 devbase 内部其他模块的 `crate::` 引用超过 **5 个**,禁止提取为 workspace crate。 -- 已提取 crate 的重新导出文件(`src/symbol_links.rs` 等)**禁止添加新代码**——顶部有 `RE-EXPORT ONLY` 注释作为守卫。 -- 子 crate 的依赖版本必须与 workspace 统一,禁止独立 bump。 - -**Fitness Function**: -```bash -# 扫描所有 src/*.rs,统计 crate:: 引用数 -for f in src/*.rs; do - count=$(grep -c 'crate::' "$f") - if [ "$count" -gt 15 ]; then - echo "HIGH COUPLING: $f ($count refs)" - fi -done -# 预期输出:空(或仅已标记的高耦合文件如 mcp/tools/repo.rs) -``` - -### RF-6: 生产代码无 panic(Crash-only Software) - -**理论锚定**:Rust 的 `Result` 类型将错误显式化;`unwrap` 是将运行时崩溃隐藏在类型系统背后(参考:Joe Armstrong — Let it crash,但 Rust 中崩溃 = 进程终止,不可接受)。 - -**规则**: -- 生产代码(`src/**/*.rs` 中不在 `#[cfg(test)]` 块内的代码)禁止 `unwrap()`、`expect()`、`panic!()`。 -- 测试代码不受此限,但鼓励使用 `?` 传播。 - -**Fitness Function**: -```bash -# 生产代码 unwrap 计数(排除 #[cfg(test)] 块及 tests.rs 文件) -for f in $(find src -name "*.rs"); do - test_line=$(grep -n "#\[cfg(test)\]" "$f" | head -1 | cut -d: -f1) - if echo "$f" | grep -qE "tests?\.rs$|_test\.rs$|/tests/"; then continue; fi - if [ -n "$test_line" ]; then - head -n "$((test_line - 1))" "$f" | grep -n "\.unwrap()" - else - grep -n "\.unwrap()" "$f" - fi -done -# 预期输出:空 -``` - -**状态**:🟢 **已完成**(v0.20.1 复核:生产代码 unwrap = 0;此前 1090 为测试模块误统计)。 - -### 架构治理框架(Architecture Governance) - -> 参考:外部架构治理方法论(Kimi 会话 `e9f2965f-b949-46a5-9d7c-afd6d4d9232c`) - -**已制度化实践**: - -| 实践 | devbase 落地形式 | 文档位置 | -|------|-----------------|---------| -| ADR(架构决策记录) | ADR-001(单 crate defer)、ADR-002(batch encoding 回滚) | [`docs/architecture/adr-template.md`](docs/architecture/adr-template.md) | -| 不变量清单(Invariants) | RF-1~RF-7 + 分层模块约束(T01–T12) | [`docs/architecture/invariants.md`](docs/architecture/invariants.md) | -| 模块提取演习 | RF-7 的 5 个 `crate::` 引用阈值 + 已提取 18 workspace crates | 本文件 §RF-7 | -| 三层摘要 | `crates/*/README.md` 要求:一句话 + 一页纸 + 深度链接 | 各 crate README | -| 定期架构回顾 | 每次 Wave 结束时的架构审计(见 `docs/_audit/`) | `docs/_audit/2026-04-26-*.md` | - -**待增强**: -- 三层摘要:部分已提取 crate 的 README 尚未达到"一页纸"标准 -- 定期架构回顾:当前按 Wave(功能迭代周期)触发,建议每 2–4 周增加一次纯架构 review(不看 feature 进度,只看不变量违反和隐式依赖) - ---- - -## 禁止事项 - -- 不得修改 `dev\third_party\*` 外部仓库 -- 不得在没有迁移逻辑的情况下修改 registry schema -- 不得引入已 deprecated 的协议 -- **不得在主仓库引入 Spark/Flink 依赖**(研究性质代码必须置于独立仓库,保持主仓库轻量) -- **不得在任何源码文件中硬编码真实 token、api_key 或密码**(包括注释和测试数据) - -> 完整版(含历史记录、路线图、详细讨论):见 docs/AGENTS-full.md +# Agent 环境指引 + +`devbase` 是 **本地情境编译器(Local Context Compiler)** —— AI agent 在本地数字世界中的海马体。 + +> 它将本地数字资产的原始数据(代码库、笔记、Skill、工作流)编译为 AI 可决策的结构化情境,不负责思考,不负责执行,只负责感知、编码、持久化、检索。 + +- **当前阶段**:阶段十一 — v0.20.0 已发布(知识完备性) +- **当前版本**:v0.20.1(Schema 36,71 MCP tools,495 tests) +- **已完成里程碑**:Registry God Object 完全拆解(10 子模块提取)+ 18 workspace crates 提取 + MCP Python SDK 1.16.0 兼容修复 + repo.rs trait 化 + flaky 测试根治(RF-2.1/2.2/2.3)+ 许可证迁移 + health 性能优化(-44%)+ index skip-embeddings + batch encoding 实验 + RF-6 清零 + 架构治理文档(ADR/不变量清单)+ Tantivy BM25 代码符号搜索(P1)+ AppContext 职责拆分 Phase 1/2(storage.rs 860→430 行)+ 架构不变量 CI(G5/T11/T12)+ Embedding 多后端(Candle/Ollama 配置切换, P3)+ EnvVersionCache 扩展(9 工具链检测, P4)+ **v0.16.0 Agent Contexts(P1/P2/P3)**:`agent_contexts`/`agent_memories`/`context_entity_links` Schema + 9 个 Session MCP tools + Context-aware Skill Runtime(`DEVBASE_ACTIVE_CONTEXT` 注入)+ **v0.16.1 Workflow-Session Binding**:`workflow_executions.context_id` + 执行自动绑定 Active Context + **v0.17.0 Embedding Externalization**:`embedding` 从 default features 移除(Candle/Ollama 降级为 opt-in `llm-backend`)+ Schema 34 向量存储 + `cosine_similarity` SQLite UDF + `devkit_session_recall` / `devkit_session_index`(60 tools)+ **v0.18.0 ClaudeCode Integration**:`devkit_project_brief`(Markdown 项目简报)+ `devkit_impact_analysis`(修改影响范围分析)+ `devkit_session_export` / `devkit_session_import` + `scripts/devbase-claude.ps1` 启动器(自动注入 `.claude/CLAUDE.md`)+ RFC `docs/RFC/claudecode-workflow-integration.md`(64 tools)+ **v0.18.0 发布收尾**:PR 合并 + 双平台二进制构建 + GitHub Release + 根目录治理 + 世界模型战略认知沉淀(Vault + AGENTS 双向联动)+ NotebookLM 生态消化(5 项目注册)+ GreptimeDB 互补分析 + **v0.19.0 知识基础设施硬化**:SQLite WAL 默认启用 + `devkit_index_health`(Beta)+ Vault 导出(`devkit_vault_export`)+ Redis ADR 决策(放弃引入)+ **v0.20.0 知识完备性**:Vault 双向链接 BFS 图遍历(`devkit_vault_graph` 扩展)+ Vault Git-based 历史追踪(`devkit_vault_history`,第 67 个 tool)+ 混合检索质量监控(`devkit_search_quality`,第 68 个 tool,`HybridSearchMetrics`)+ Block 引用支持(`WikiLink.anchor`:`[[note#heading]]` / `[[note#^block-id]]`)+ 性能回归基线(`#[ignore]` 1k/10k 阈值测试)+ 客户端无关原则(Client-Agnostic Principle)落地 + `skill sync` 泛化接口(零硬编码客户端路径) +- **核心方向**:让 Kimi CLI 在调用文件工具之前,先通过 devbase 获得"该读哪些文件、为什么读、它们之间的关系" +- **本质分析**:见 `vault/99-Meta/devbase-essence-analysis-20260430.md` 与 `docs/architecture/redefinition.md` +- **设计文档**: + - [`docs/architecture/workflow-dsl.md`](docs/architecture/workflow-dsl.md) — Workflow DSL 规范 + - [`docs/architecture/workspace-as-schema.md`](docs/architecture/workspace-as-schema.md) — 统一实体模型设计 + - [`docs/RFC/agent-memory-vector-storage.md`](docs/RFC/agent-memory-vector-storage.md) — v0.17.0 Agent Memory 向量存储 RFC(Embedding 职责外迁设计) + - [`docs/guides/mcp-integration-guide.md`](docs/guides/mcp-integration-guide.md) — MCP 集成指南 + - [`docs/README.md`](docs/README.md) — 完整文档导航 + +Skill Runtime 全生命周期已落地(含依赖管理 Schema v15),Schema v16 统一实体模型(entities/relations)已落地,Skill 自动封装(`discover`)已落地。 + +- **技术栈**:Rust 2024, SQLite, tokio, ratatui, git2, reqwest, tantivy +- **Registry DB**:`%LOCALAPPDATA%\devbase\registry.db`(轻量索引,用户本地,永不进入版本控制) +- **Workspace**:`%LOCALAPPDATA%\devbase\workspace/` —— 文件系统 = source of truth + - `vault/` —— PARA 结构:00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta + - `assets/` —— 二进制资源 +- **MCP Server**:stdio only,**71 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 5 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具 + 1 个 DocumentConvert 工具 + 1 个 Ontology Import 工具 + 1 个 Skill Sync 工具);配置见 `mcp.json` +- **Kimi CLI 集成**:MCP server 已通过 `kimi mcp add` 注册,端到端验证通过(`kimi --print` 成功调用 `devkit_health`);项目级 skill 位于 `.kimi/skills/devbase-project/SKILL.md` +- **统一节点模型**:`core::node::{Node, NodeType, Edge}` —— GitRepo / VaultNote / Asset / ExternalLink +- **当前测试**:476 lib passed / 0 failed / 5 ignored + 7/7 integration passed + 11/11 workflow passed(共 494) +- **编译状态**:0 warning / 0 vulnerabilities(`cargo audit` 干净,除上游 `tokei` 的 `RUSTSEC-2020-0163`) +- **Workspace 结构**:`crates/` 目录已启用,19 个零耦合模块已提取为独立 crate(`devbase-symbol-links`, `devbase-sync-protocol`, `devbase-core-types`, `devbase-syncthing-client`, `devbase-vault-frontmatter`, `devbase-vault-wikilink`, `devbase-workflow-interpolate`, `devbase-workflow-model`, `devbase-registry-health`, `devbase-registry-metrics`, `devbase-registry-workspace`, `devbase-embedding`, `devbase-skill-runtime-types`, `devbase-skill-runtime-parser`, `devbase-registry-entity`, `devbase-registry-relation`, `devbase-registry-call-graph`, `devbase-registry-dead-code`, `devbase-registry-code-symbols`) +- **Workflow Engine**:YAML 解析 + 拓扑调度 + batch 并行执行 + 5 种 step 类型(skill/subworkflow/parallel/condition/loop) +- **NLQ 自然语言查询**:TUI `[:]` 触发 embedding 语义搜索,fallback 降级文本搜索 +- **Mind Market 评分**:success_rate / usage_count / rating(0-5),`skill recalc-scores/top/recommend` + +## 关键约定 + +1. **文件操作**:读取用 `ReadFile`,搜索用 `Grep`/`Glob`,修改用 `StrReplaceFile`,整文件重写用 `WriteFile` +2. **Shell**:Windows PowerShell;用 `;` 分隔命令 +3. **Git**:提交前必须通过 `cargo test --all-targets` + `cargo clippy --all-targets -D warnings` + `cargo fmt --check` +4. **Schema 迁移**:`PRAGMA user_version` 安全升级;升级前自动调用 `backup::auto_backup_before_migration()` + +## 安全原则 + +### 本地优先(Local-First) + +- **Registry DB** 始终存储在用户的本地配置目录(`dirs::config_dir()/devbase/`),绝不向远程传输 +- **代码内容** 不会被上传到任何云端服务(除非用户显式配置 GitHub token 用于 stars 查询) +- **MCP Server** 仅通过 stdio 本地进程通信,不暴露网络端口 + +### 客户端无关(Client-Agnostic) + +> devbase 的核心能力(编排、注册、索引、搜索、同步)必须在不依赖任何特定 AI 客户端的前提下独立运行。 + +- ✅ **允许**:向通用目录输出数据,由用户自行分发给任意客户端(如 `skill sync --output-dir ./plans`) +- ✅ **允许**:实现标准协议(MCP)供任意客户端连接 +- ❌ **禁止**:核心能力硬编码特定客户端的路径、API、或配置格式(如 `C:\Users\xxx\.claude`) +- ❌ **禁止**:核心能力的可用性取决于某个客户端是否安装 +- 🟡 **适配层**:`scripts/claude/`、`docs/clients/` 等目录下的客户端适配脚本属于配套示例,不归入核心版本控制 + +### 凭证管理 + +- GitHub token、LLM API key 存储在本地 `config.toml` 中 +- `config.toml` 位于用户配置目录,**不在项目工作目录**,因此不会被意外 `git commit` +- 默认配置模板中的 token 字段使用占位符 ``,避免真实 token 格式泄露 +- `.gitignore` 已覆盖 `*.db`、`.devbase/`、`.env*`、`*.local.toml` + +### 审计与备份 + +- 所有 `scan`/`sync`/`health` 操作自动写入 OpLog(SQLite `oplog` 表) +- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 +- Registry 支持 `export`/`import` 用于用户自主备份 + +## 许可证策略 + +- **主许可证**: AGPL-3.0-or-later (`LICENSE`) +- **商业授权**: 双许可模式,闭源/专有 SaaS 使用需联系作者 (`LICENSE-COMMERCIAL.md`) +- **Cargo.toml**: `license = "AGPL-3.0-or-later"` +- **SPDX 头**: 新增源文件应在顶部包含 AGPL-3.0 声明(见 `LICENSE` 末尾 "How to Apply" 部分) + +## 架构状态(Wave 15b 完成) + +| 维度 | 状态 | +|------|------| +| 代码质量 | `rustfmt.toml` + `cargo fmt` + `clippy -D warnings` 全绿 | +| 模块拆分 | `sync`→5 / `registry`→11 / `mcp` 测试分离 / `search`→hybrid / `oplog_analytics` / `symbol_links` / **workspace: 3 crates extracted** | +| 库/二进制 | `src/lib.rs` 导出全部 **30+** 个模块;`src/main.rs` 仅 CLI 入口 | +| TUI 架构 | `render/` 6 子模块 + `theme.rs` Design Token + `layout.rs` 响应式引擎 | +| 数据层 | Schema v23: `repos`/`vault_notes`/`papers`/`workflows`/`repo_modules_legacy` 表已删除;`entities` 为唯一数据源;`repo_tags/repo_remotes/repo_health/...` 为独立 JOIN 表(无 FK);仅 `skills` 保留独立表(embedding BLOB) | +| CI/CD | `.github/workflows/ci.yml`:check / test / fmt / clippy on Windows | +| 依赖安全 | `cargo audit` 0 漏洞(除上游 `tokei` 的 `RUSTSEC-2020-0163`) | + +## 架构红线(Architecture Guardrails) + +> 基于第一性原理的工程约束。违反任意一条 = HALT,转交人类裁决或回滚。 +> 规则编号 `RF-XX`(Red-line / Fitness function),带客观测量标准,非主观描述。 + +### RF-1: 依赖注入优于全局状态(Global State Anti-Pattern) + +**理论锚定**:全局可变状态使组件隐式耦合,破坏可测试性与可复用性(参考:Pure Function / DI 原则)。 + +**规则**: +- 禁止新增 `dirs::data_local_dir()` / `std::env::var_os` 硬编码路径。 +- 所有 IO 边界路径(DB、索引、备份、配置)必须通过参数、构造函数或 `trait` 注入。 +- **例外(Grandfathered)**:现有 3 处(`backup_dir`、`db_path`、`index_path`)在重构前不得新增第 4 处。 + +**Fitness Function**: +```bash +# 新增 PR 中不得出现新的全局路径硬编码 +grep -rn "dirs::data_local_dir\|std::env::var_os\|std::env::var(\"LOCALAPPDATA\"" src/ \ + | grep -v "backup.rs\|migrate.rs\|search.rs" +# 预期输出:空 +``` + +### RF-2: 测试密封性(Hermetic Testing) + +**理论锚定**:测试失败必须仅因被测代码缺陷,不因外部因素、测试顺序或并行调度(参考:Google Test Blog — Hermetic Servers)。 + +**规则**: +- 所有测试禁止修改全局进程状态(`std::env::set_var`、`static mut`、全局文件系统句柄)。 +- 文件系统测试必须使用 `tempfile` + 注入式路径,禁止直接操作 `%LOCALAPPDATA%` 或 `~/.config`。 +- Tantivy / SQLite 文件系统测试必须获取 `SEARCH_TEST_LOCK`(或同等级串行化机制)。 + +**子规则(来自 PR #4 教训)**: +- **R2.1 禁止 `DEVBASE_DATA_DIR` 全局注入**:并行测试中 `std::env::set_var("DEVBASE_DATA_DIR", ...)` 导致竞态;必须使用 `TempStorageBackend` 注入式替代。 +- **R2.2 Windows 路径双端规范化**:`TempDir` 可能返回短文件名(`TEMP~1`),而 `dunce::canonicalize` 返回长文件名;路径比较前必须对**双方**调用 `dunce::canonicalize`。 +- **R2.3 `git2` 测试显式身份 + 显式分支**: + - CI runner 无全局 `user.name`/`user.email` → `repo.signature()` 会 panic;必须改用 `git2::Signature::now("Test", "test@example.com")`。 + - `git2::Repository::init` 的默认分支在不同平台可能为 `master` 或 `main`;必须显式 `repo.set_head("refs/heads/main")` 并 commit 到 `"refs/heads/main"`。 + +**Fitness Function**: +```bash +# 高并发下 100% 通过,无 flaky +cargo test --test-threads=16 +``` + +### RF-3: Schema 单一事实来源(Single Source of Truth) + +**理论锚定**:重复信息必然 drift(参考:DRY 原则 + Evolutionary Architecture 的版本一致性约束)。 + +**规则**: +- `SCHEMA_DDL`(`registry/test_helpers.rs`)与 `migrate.rs` 必须原子同步。 +- 新增表、索引、列必须同时出现在两者中;禁止仅更新其一。 + +**Fitness Function**: +- CI 运行 `test_in_memory_schema_version` + schema 结构比对脚本(可手动运行 `cargo test registry::test_helpers::tests` 验证)。 + +### RF-4: 二进制入口限界(Bounded Context) + +**理论锚定**:CLI 入口应仅做命令分发,业务逻辑应在 lib 模块中(参考:Hexagonal Architecture / Ports & Adapters)。 + +**规则**: +- `main.rs` 行数不得超过 **1000 行**。 +- 新增 CLI 命令必须先拆分为 `commands/` 子模块或独立函数,禁止在 `main.rs` 中堆积业务逻辑。 + +**Fitness Function**: +```bash +# 当前 515 行(Phase 1/2/3 已削减 1003 行),远超目标 +[ $(wc -l < src/main.rs) -le 1000 ] || exit 1 +``` + +### RF-5: 无循环依赖(Acyclic Dependencies) + +**理论锚定**:循环依赖破坏模块化,使增量编译和独立复用不可能(参考:John Lakos — Large-Scale C++ Software Design)。 + +**规则**: +- 禁止模块间双向 `use crate::` 引用。 +- 新增模块必须通过脚本验证无循环(当前已满足,未来 PR 保持)。 + +**Fitness Function**: +```bash +# 文件级双向依赖检测(当前输出应为空) +for f in src/**/*.rs; do + name=$(basename "$f" .rs) + refs=$(grep -o 'use crate::[a-z_]*' "$f" | sed 's/use crate:://') + for r in $refs; do + if [ -f "src/$r.rs" ] && grep -q "use crate::$name\b" "src/$r.rs"; then + echo "CYCLE: $name <-> $r" + fi + done +done +``` + +### RF-7: Workspace 拆分约束(Module Distribution Readiness) + +**理论锚定**:模块能否独立发布是耦合健康度的金标准;不能拆分的模块 = 耦合不健康的模块。 + +**规则**: +- 新增模块若对 devbase 内部其他模块的 `crate::` 引用超过 **5 个**,禁止提取为 workspace crate。 +- 已提取 crate 的重新导出文件(`src/symbol_links.rs` 等)**禁止添加新代码**——顶部有 `RE-EXPORT ONLY` 注释作为守卫。 +- 子 crate 的依赖版本必须与 workspace 统一,禁止独立 bump。 + +**Fitness Function**: +```bash +# 扫描所有 src/*.rs,统计 crate:: 引用数 +for f in src/*.rs; do + count=$(grep -c 'crate::' "$f") + if [ "$count" -gt 15 ]; then + echo "HIGH COUPLING: $f ($count refs)" + fi +done +# 预期输出:空(或仅已标记的高耦合文件如 mcp/tools/repo.rs) +``` + +### RF-6: 生产代码无 panic(Crash-only Software) + +**理论锚定**:Rust 的 `Result` 类型将错误显式化;`unwrap` 是将运行时崩溃隐藏在类型系统背后(参考:Joe Armstrong — Let it crash,但 Rust 中崩溃 = 进程终止,不可接受)。 + +**规则**: +- 生产代码(`src/**/*.rs` 中不在 `#[cfg(test)]` 块内的代码)禁止 `unwrap()`、`expect()`、`panic!()`。 +- 测试代码不受此限,但鼓励使用 `?` 传播。 + +**Fitness Function**: +```bash +# 生产代码 unwrap 计数(排除 #[cfg(test)] 块及 tests.rs 文件) +for f in $(find src -name "*.rs"); do + test_line=$(grep -n "#\[cfg(test)\]" "$f" | head -1 | cut -d: -f1) + if echo "$f" | grep -qE "tests?\.rs$|_test\.rs$|/tests/"; then continue; fi + if [ -n "$test_line" ]; then + head -n "$((test_line - 1))" "$f" | grep -n "\.unwrap()" + else + grep -n "\.unwrap()" "$f" + fi +done +# 预期输出:空 +``` + +**状态**:🟢 **已完成**(v0.20.1 复核:生产代码 unwrap = 0;此前 1090 为测试模块误统计)。 + +### 架构治理框架(Architecture Governance) + +> 参考:外部架构治理方法论(Kimi 会话 `e9f2965f-b949-46a5-9d7c-afd6d4d9232c`) + +**已制度化实践**: + +| 实践 | devbase 落地形式 | 文档位置 | +|------|-----------------|---------| +| ADR(架构决策记录) | ADR-001(单 crate defer)、ADR-002(batch encoding 回滚) | [`docs/architecture/adr-template.md`](docs/architecture/adr-template.md) | +| 不变量清单(Invariants) | RF-1~RF-7 + 分层模块约束(T01–T12) | [`docs/architecture/invariants.md`](docs/architecture/invariants.md) | +| 模块提取演习 | RF-7 的 5 个 `crate::` 引用阈值 + 已提取 18 workspace crates | 本文件 §RF-7 | +| 三层摘要 | `crates/*/README.md` 要求:一句话 + 一页纸 + 深度链接 | 各 crate README | +| 定期架构回顾 | 每次 Wave 结束时的架构审计(见 `docs/_audit/`) | `docs/_audit/2026-04-26-*.md` | + +**待增强**: +- 三层摘要:部分已提取 crate 的 README 尚未达到"一页纸"标准 +- 定期架构回顾:当前按 Wave(功能迭代周期)触发,建议每 2–4 周增加一次纯架构 review(不看 feature 进度,只看不变量违反和隐式依赖) + +--- + +## 禁止事项 + +- 不得修改 `dev\third_party\*` 外部仓库 +- 不得在没有迁移逻辑的情况下修改 registry schema +- 不得引入已 deprecated 的协议 +- **不得在主仓库引入 Spark/Flink 依赖**(研究性质代码必须置于独立仓库,保持主仓库轻量) +- **不得在任何源码文件中硬编码真实 token、api_key 或密码**(包括注释和测试数据) + +> 完整版(含历史记录、路线图、详细讨论):见 docs/AGENTS-full.md diff --git a/CHANGELOG.md b/CHANGELOG.md index c5a395e..0f26b8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,692 +1,694 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -### Added - -- **FTS5 技能全文搜索** (Schema v35) — `skills_fts` 虚拟表 + 触发器,`search_skills_text()` 使用 BM25 排序(name=1.0, desc=0.8, tags=0.4, category=0.2),LIKE 降级 fallback -- **可插拔外部技能源** (Schema v36) — `SkillSource` trait + `GitHubSource` / `LocalFileSource`,`sync_sources` / `sync_log` 审计表,`devkit_skill_sync` MCP 工具(Beta tier),`devbase skill import` CLI 子命令 -- MCP 工具数: 69 → **70**(5 stable + 61 beta + 4 experimental) -- `devkit_document_convert` — Experimental tier MCP tool,PDF/PPTX → Markdown 转换(`pdftotext` / `python-pptx` 流水线),含 frontmatter 质量标注 -- Stable 工具 invocation 测试补全:`devkit_query_repos`、`devkit_vault_search`、`devkit_vault_read`、`devkit_status`、`devkit_workflow_list`、`devkit_index` -- `seed_repo()` 轻量测试 helper(仅插入 `entities` 表,无副作用) - -### Fixed - -- `mcp/tools/document_convert.rs` 原始字符串定界符修复(`r###"` 避免与 Python f-string `"##` 冲突) -- `cleanup_extracted_text` 单元测试期望值与实现语义对齐(保留最多 2 个连续空行) - -### Changed - -- **Workspace crate 架构重组** — 消除机械提取造成的微 crate 碎片 - - 合并 8 个 `devbase-registry-*` 微 crate(100–300 行/个)为统一 `devbase-registry`,含 8 个语义子模块(`entity`, `health`, `metrics`, `relation`, `call_graph`, `code_symbols`, `dead_code`, `workspace`) - - 拆分 10+ 个 monolithic `lib.rs` 为域驱动子模块:`devbase-embedding` (`candle`/`ollama`), `devbase-workflow-model` (`definition`/`execution`/`step_type`), `devbase-symbol-links` (`similarity`/`co_located`), `devbase-sync-protocol` (`index`/`version_vector`), `devbase-skill-runtime-types` (`skill_type`/`execution`/`params`), `devbase-skill-runtime-parser` (`frontmatter`/`field_parsers`), `devbase-workflow-interpolate` (`resolver`), `devbase-vault-frontmatter` (`parser`), `devbase-vault-wikilink` (`parser`), `devbase-core-types` (`node_type`/`node`/`edge`) - - 全 workspace `Cargo.toml` 统一使用 `[workspace.package]` 继承(`version`, `edition`, `authors`, `license`, `repository`) -- `KNOWN_ISSUES.md` 更新:document_convert 从 P3 债务移至已解决归档;测试计数 485→494 -- `docs/reference/mcp-tools.md` 修正为 69 个工具,补充 Index / Workflow / Relation / KnownLimit / Session 分类 -- `docs/reference/stable-tools/README.md` 修正为 5 个 Stable 工具(删除过时的 `project_brief.md` / `hybrid_search.md` / `session_recall.md`) - -## [0.20.1] - 2026-05-17 - -### Added - -- **Phase 1 Production Hardening** - - Workflow E2E 测试 — `src/mcp/tools/workflow.rs`:DAG 成功执行、失败传播验证 - - RF-7 路径隐私脱敏 — `sanitize_path()` 自动掩码 home 目录为 `~` - - Tantivy 一致性修复 — `repair_tantivy_consistency_at()` 启动时自动检测 orphan/missing 文档 - - 性能回归基线 — `test_keyword_search_latency_regression_1k` / `_10k`(profile-aware 阈值) - - `TempStorageBackend` — 测试隔离后端,消除 `DEVBASE_DATA_DIR` 竞态 -- **Architecture Invariants CI 自动化** — `scripts/invariant-checks/run-checks.ps1` - - G5 (RF-6):diff-only 检测新增生产代码 `unwrap`/`expect`/`panic`(排除 `#[cfg(test)]`) - - T11:`mcp/tools` 禁止直接调用 `rusqlite::Connection` - - T12:`tui/render` 纯消费检查(禁止写入操作) - -### Fixed - -- `AppContext::with_storage()` 使用实际 storage backend 的 `index_path()` 而非硬编码默认值 -- G5 invariant checker 正则修复:`tests.rs` 文件正确跳过 -- `Cargo.lock` 同步版本 bump(修复 `--locked` release 构建失败) -- 平台相关测试隔离:`C:\` 路径断言加 `#[cfg(windows)]`,Linux `python3` 断言适配 -- HuggingFace 网络依赖测试加 `#[ignore]`(避免 CI TLS 证书失败) - -## [0.20.0] - 2026-05-16 - -### Added - -- **知识完备性**:Vault 双向链接图遍历(BFS depth 1-3)+ `[[note#heading]]` block 引用 -- **Vault 笔记历史追踪** — Git-based blob diff,`devkit_vault_history` tool -- **混合检索质量监控** — `HybridSearchMetrics`(latency/recall/overlap/keyword_source) -- **性能回归基线** — Criterion benchmarks:`index_repo_full`、`cosine_similarity`、`extract_symbols` -- **客户端无关原则** — `StorageBackend` trait 完整实现,解耦 `dirs::data_local_dir()` 硬编码 -- **MCP Tools +4** (68 total) - - `devkit_vault_history`, `devkit_vault_export`, `devkit_vault_graph`, `devkit_vault_daily` - -### Changed - -- 20+ 独立 crate 零循环依赖,workspace 拆分完成 -- `entities` 表成为唯一真相源,`repos` 表彻底删除 -- Tantivy / SQLite 补偿扫描:启动时自动同步 orphan 文档 - -## [0.19.0] - 2026-05-14 - -### Added - -- **SQLite WAL 模式** — `r2d2` 连接池 + WAL journal,并发安全与增量备份 -- **Tantivy 健康评分** — `devkit_index_health`:损坏检测、自动重建、孤儿文档清理 -- **Vault 导出** — `devkit_vault_export`:Obsidian-compatible Markdown 批量导出 -- **Redis ADR 决策** — `docs/architecture/adr-003-redis.md`:评估后决定保持 SQLite 优先 -- **OpLog 审计追踪** — 结构化事件类型 `OplogEventType`,全操作不可变日志 - -### Changed - -- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 -- 索引层反向一致性扫描与自动修复能力 - -## [0.18.0] - 2026-05-13 - -### Added - -- **ClaudeCode 工作流集成** — `docs/RFC/claudecode-workflow-integration.md` - - `devkit_project_brief` — 生成项目 Markdown 简报(架构 + 模块 + 近期提交 + 已知约束),用于 `.claude/CLAUDE.md` 注入 - - `devkit_impact_analysis` — 符号级变更影响半径分析(BFS 调用图遍历 + 相关符号发现 + 测试启发式 + 历史 oplog) - - `scripts/devbase-claude.ps1` — PowerShell 一键启动器:自动检测 repo → 生成简报 → 注入 `.claude/CLAUDE.md` → 启动 `claude` → 可选捕获退出 diff -- **Session 导入/导出工具** - - `devkit_session_export` — 导出会话为 Markdown / JSON;支持记忆类型图标与元数据 - - `devkit_session_import` — 从 bulk text 批量导入记忆(`[type]` 前缀解析) -- **MCP Tools +4** (64 total) - - `devkit_project_brief`, `devkit_impact_analysis`, `devkit_session_export`, `devkit_session_import` -- **TUI Session 视图硬化** - - 三态 MainView 切换:`RepoList → VaultList → Session`(`Tab` 键循环) - - Session 列表:状态图标(● active / ◌ archived)+ 高亮样式 - - Session 详情:记忆类型图标(◆ decision / ▪ constraint / ★ discovery / ✗ error)+ embedding model 标签 + indexed 状态 -- **AGENTS.md** 同步至 v0.18.0-dev 基线(64 Tools / 437 tests) - -### Changed - -- `src/mcp/mod.rs` Tool 注册表扩展至 64 工具(稳定 + Beta) -- `src/mcp/tests.rs` 工具计数断言同步 -- TUI `render_session.rs` / `state/mod.rs` 适配 Schema v34 记忆字段(`embedding_model`, `indexed_at`) - -## [0.17.0] - 2026-05-13 - -### Added - -- **Agent Memory 向量存储** — Schema v34 - - `agent_memories` 新增 `embedding BLOB`, `embedding_model TEXT`, `indexed_at DATETIME` - - Partial index `idx_agent_memories_embedding` 仅索引含向量的行 - - `AgentMemory` 结构体扩展向量元数据字段 -- **SQLite UDF: `cosine_similarity`** — `src/registry/agent_context.rs` - - 输入: 两个 little-endian f32 BLOB - - 输出: REAL ∈ [-1.0, 1.0] - - 注册时机: `WorkspaceRegistry::init_db_at` 迁移完成后自动注册 -- **语义记忆搜索** — `search_memories_semantic(context_id, query_embedding, limit)` - - 纯 SQL `ORDER BY cosine_similarity(embedding, ?) DESC` - - 零 LLM 运行时依赖;仅执行向量比对 -- **MCP Tools +2** (60 total) - - `devkit_session_recall` — 外部向量查询 + 语义召回 top-k memories - - `devkit_session_index` — 为已有 memory 注入外部生成 embedding -- **Skill Runtime Auto-Recall** — `src/skill_runtime/executor.rs` - - Tier 1: Semantic recall (本地 Candle/Ollama 或外部 HTTP endpoint) - - Tier 2: Keyword fallback (`LIKE` search on `content`) - - 新环境变量: `DEVBASE_CONTEXT_MEMORY_COUNT`, `DEVBASE_CONTEXT_RECALL_METHOD` - - `DEVBASE_CONTEXT_MEMORIES` 升级为 top-k 相关 memories(含 `score` + `model`) -- **外部 Embedding Provider 集成** - - `call_external_embedding_endpoint` — `reqwest::blocking` POST `/api/embeddings` - - 配置驱动: `config.toml [embedding]` (enabled/provider/model/base_url/timeout) - - 端到端测试: mock TCP server 验证 Ollama 格式解析 + 错误码处理 -- **RFC 文档** — `docs/RFC/agent-memory-vector-storage.md` - - 架构决策: devbase = 向量数据库层,不做 embedding 生成 - - 参照 pgvector 边界设计 - -### Changed - -- **Feature Flags**: `embedding` 从 `default` 移除 - - Candle/Ollama 依赖变为 opt-in: `--features embedding` - - 默认构建零 ML 依赖,编译时间减少 30~50% -- `insert_memory` 签名扩展: 新增可选 `embedding: Option<&[f32]>` 和 `embedding_model: Option<&str>` -- `list_memories` / `search_memories` SELECT 语句扩展为 8 列(兼容新增字段) -- AGENTS.md 同步至 v0.17.0-dev 基线 - -### Breaking Changes - -- 默认构建不再包含 `devbase-embedding` crate;需要语义生成能力的用户须显式启用 `--features embedding` -- `generate_query_embedding` 在默认构建下返回错误(提示启用 feature 或配置外部 endpoint) - -## [0.16.1] - 2026-05-13 - -### Added - -- **Workflow-Session Binding** — Schema v33 - - `workflow_executions` 新增 `context_id` 列 + 索引 - - `create_execution` 自动绑定 `resolve_active_context()` - - MCP `devkit_workflow_run` 与 CLI `workflow run` 均支持自动绑定 - - `devkit_session_workflows` tool: 列出指定 context 的 workflow 执行历史 -- `context_entity_links` 表 (Schema v32): context 与任意 entity 的多对多关联 - -## [0.16.0] - 2026-05-13 - -### Added - -- **Agent Contexts (AI Agent OS)** — Schema v31 - - `agent_contexts` 表: 持久化 AI session / project scope - - `agent_memories` 表: 结构化记忆(decision/constraint/note/discovery/error) - - 9 个 Session MCP tools: save/list/resume/attach/detach/activate/search/capture/workflows - - `resolve_active_context()`: 环境变量 `DEVBASE_ACTIVE_CONTEXT` → 文件 `.active_context` fallback - - Context-aware Skill Runtime: 注入 `DEVBASE_ACTIVE_CONTEXT` + `DEVBASE_CONTEXT_MEMORIES` + `DEVBASE_CONTEXT_LINKS` - - 所有 agent_context 操作自动写入 OpLog (`OplogEventType::AgentContext`) - -## [0.15.0] - 2026-05-04 - -### Added - -- **P1: Tantivy BM25 代码符号搜索** — `search/symbol_index.rs` - - 独立 Schema (`repo_id`, `name`, `signature`, `file_path`, `line_start`) - - `keyword_search_symbols` 主路径走 Tantivy BM25,SQLite LIKE 回退 - - 索引流程 `index.rs` 自动同步写入 symbol_index - - `StorageBackend` 扩展 `symbol_index_path()`(6 实现) -- **P3: Embedding 多后端** — Candle (默认) + Ollama (配置切换) - - 新增 `OllamaProvider` (`ureq` HTTP `/api/embed`) - - `create_provider(backend, model, base_url, timeout)` 配置化创建 - - `generate_query_embedding` 通过 `OnceLock` 懒加载配置化 provider - - 默认模型改为 `all-minilm` (384-dim,与 Candle 维度兼容) -- **P4: Health 环境检测扩展** — `EnvVersionCache` 从 5 工具 → 9 工具 - - 新增: `python`, `bun`, `zig`, `java` - - `get_tool_version` 支持 stderr fallback (Java 输出到 stderr) - - `fmt_version` 改进: Java 引号提取、Docker/Python 格式处理 -- **P5: 架构不变量自动化 CI** — `scripts/invariant-checks/run-checks.ps1` - - G5: diff-only 检测新增生产代码 unwrap/expect/panic(排除 `#[cfg(test)]`) - - T11: 检测 `mcp/tools/*` 直接调用 `rusqlite::Connection` - - T12: 检测 `tui/render/*` 写入操作 - - CI job `invariant-check` 加入 `.github/workflows/ci.yml` -- **P2 Phase 1: AppContext 职责拆分** — 6 个 Client trait impl 迁出 `storage.rs` - - `scan.rs` / `health.rs` / `sync.rs` / `digest.rs` / `knowledge_engine/mod.rs` / `registry.rs` - - `storage.rs` 860 → 430 行 (-50%) - - 删除冗余 `conn_mut()` -- **P2 Phase 2: 内联 SQL 下沉** — 新增 `registry/code_symbols.rs` + `registry/dead_code.rs` - - `CodeSymbolRow` / `DeadCodeRow` + 纯函数查询 (12 个单元测试) - - `RegistryClient` 退化为纯代理层 - -### Changed - -- `EmbeddingConfig` 默认模型 `nomic-embed-text` → `all-minilm` (384-dim) -- AGENTS.md 阶段描述更新: v0.14.3 → v0.15.0 推进中 → v0.15.0 全部完成 - -### Fixed - -- **TTL 缓存负值 bug** (`97172ec`): `elapsed < ttl_seconds` → `elapsed >= 0 && elapsed < ttl_seconds` - - 防止系统时间回溯导致缓存永不过期 -- `crates/devbase-embedding/src/lib.rs` 遗留 unwrap 清零 (`encode_with_candle` → `ok_or`) - -## [0.14.3] - 2026-05-05 - -### Added - -- **Schema v30** — `code_symbols.attributes` 列,tree-sitter 提取 `#[test]`/`#[tokio::test]` 等属性 - - `devkit_dead_code` 自动过滤测试函数,消除假阳性 - - `rust_node_to_symbol` 支持 `prev_sibling()` 回溯收集属性节点 -- **Tantivy/SQLite 补偿扫描** — 启动时自动检测并清理 orphan 文档 - - 新增 `search::sync_index_to_db(conn)`,对比 Tantivy `list_indexed_repo_ids()` 与 SQLite `entities` - - `AppContext` 初始化后自动调用,失败仅 warn 不阻塞启动 -- **Feature flags** — `mcp` + `embedding`,支持 `--no-default-features` 最小化编译 - - `default = ["tui", "mcp", "embedding"]` - - `devbase-embedding` 设为 `optional = true` - - 新增 `src/clients.rs` 提取 MCP client traits,避免 mcp feature 关闭时 trait 不可用 -- **Kimi CLI MCP 集成文档** — AGENTS.md 新增 Kimi CLI 集成状态,项目级 skill 位于 `.kimi/skills/devbase-project/` - -### Changed - -- **RF-1 架构红线** — `init_db()` 全局路径残留清零 - - `init_db()` 标记 `#[deprecated]`,新增 `init_db_with(backend: &dyn StorageBackend)` - - `workflow/executor.rs`、`workflow/state.rs`、`storage.rs` 全部改为注入式 - - `examples/` + `benches/` 中额外 5 处残留同步修复 -- `index_repo_full` 合并用户 `scan.exclude_patterns` 与默认排除模式 -- `cargo fmt` + `cargo clippy --fix` 全量格式化(8 文件,6 处 warning 修复) -- `CONTRIBUTING.md` 新增 sccache 构建加速指南 - -### Fixed - -- `cargo clippy --all-targets -D warnings` — 7 warnings → 0 -- `cargo fmt --check` — 全量通过 - -## [0.14.2] - 2026-05-02 - -### Changed - -- health dirty 检测修复(排除 ignored 文件) -- scan 路径规范化 + syncthing-rust 识别修复 -- experiment_log / CodeMetrics / ModuleGraph / CallGraph / DeadCode 提升为 Beta tier -- 48 tools: Stable 5 / Beta 40 / Experimental 3 - -## [0.14.1] - 2026-05-01 - -### Added - -- CLI JSON 输出补全 (`--json` / `--recalc`) -- relations MCP 工具加固 -- License headers 全量补录 -- Vault Daily / Vault Graph MCP tools - -## [0.14.0] - 2026-04-28 - -### Added - -- Workspace 拆分:6 个零耦合 crate 提取 -- MCP trait 化:`mcp/tools/repo.rs` `crate::` 引用 68→41 - -## [0.13.0] - 2026-04-26 - -### Added - -- Registry God Object 拆解:10 子模块提取为 free function -- `WorkspaceRegistry` 退化为纯 facade - -## [0.12.0] - 2026-04-30 - -### Added - -- **Schema v22** — drop `vault_notes`, `papers`, `workflows` orphan tables; `entities` becomes sole source of truth for all entity types -- **Managed-Gate Fail-Safe Defaults** — `devbase sync` defaults to managed repos only - - Management tags: `mirror`, `reference`, `third-party`, `collaborative`, `team`, `own-project`, `tool`, `active`, `managed` - - Untagged / non-management repos are registered but skipped by default sync - - `--filter-tags` bypasses the gate for explicit selection -- **`.devbase-ignore`** — directory-level opt-out exclusion during scan -- `scan --register` no longer auto-tags repos with `"discovered"` -- i18n hint for unmanaged repos - -### Changed - -- `inspect_repo`: remove `"discovered"` from default tags; `-main`/`-master` repos keep `zip-snapshot` + `needs-migration` -- `collect_tasks`: default mode filters by management tags -- All `list_workflows` / `list_papers` / `list_vault_notes` queries migrated to `entities` table + `json_extract` -- Generic `upsert_entity` abstraction for entity dual-write -- `ENTITY_TYPE_*` constants extracted across 10 files (~25 replacements) -- `cargo test --lib`: 374 → 379 passed - -### Breaking Changes - -- Existing repos tagged `"discovered"` are **no longer synced by default**. - Use `devbase tag managed` (or any management tag) to opt a repo into automatic sync. - -## [0.10.0] - 2026-04-26 - -### Added - -- **L3 Risk Layer MVP** — `known_limits` 表 + Registry CRUD + MCP tools + CLI subcommand - - Schema v18: `known_limits` 表(id, category, description, source, severity, first_seen_at, last_checked_at, mitigated) - - Registry CRUD: `save`/`get`/`list`/`delete`/`resolve`/`seed_hard_vetoes` - - MCP tools: `devkit_known_limit_store` / `devkit_known_limit_list`(Beta tier) - - CLI: `devbase limit {add,list,resolve,delete,seed}` - - OpLog 集成: create/update/resolve/delete/seed 自动写入 oplog(event_type = `KnownLimit`) - - Hard Veto 种子: AGENTS.md 中的 5 条硬约束自动填充 -- **L4 元认知层 MVP** — `knowledge_meta` 表 + L3-L4 联动 - - Schema v19: `knowledge_meta` 表(id, target_level, target_id, correction_type, correction_json, confidence, created_at) - - Registry CRUD: `save`/`get`/`list`/`delete` - - CLI 联动: `devbase limit resolve --reason "..."` 自动创建 L4 meta 记录 -- **Hard Veto 运行时守卫** — Skill 执行前自动检查未解决 hard veto - - `skill_runtime::executor::run_skill` 执行前查询 `known_limits` - - 未解决 hard veto 存在时,警告注入 `stderr`,同时写入 OpLog - - 零破坏性:skill 仍执行成功,但输出中包含 `[HARD-VETO-WARNING]` - -### Changed - -- `cargo test --all-targets`: 279 → 288 passed -- MCP tool 总数: 35 → 37 - -## [0.11.3] - 2026-04-26 - -### Changed - -- **Phase 1 主从表切换 — Stage 3 完成**(`repos` 表删除) - - `save_repo` / `update_repo_*` / `run_clean` 不再写入 `repos` - - Schema v21 迁移:重建 11 个子表(去 FK)→ 删除 `repos` 表 - - `test_helpers.rs` SCHEMA_DDL 同步去 `repos` + 去 FK - - `entities` 成为真正的读写唯一数据源 - -## [0.11.2] - 2026-04-26 - -### Changed - -- **Phase 1 主从表切换 — Stage 2 完成**(读路径迁移) - - `list_repos` / `list_repos_stale_health` / `list_repos_need_index` / `list_workspaces_by_tier` 全部改为从 `entities` 读取(`json_extract`) - - `digest.rs` / `health.rs` / `daemon.rs` / `backup.rs` / `knowledge_engine.rs` / `sync/*.rs` / `tui/state.rs` / `mcp/tools/repo.rs` 等所有 `list_repos()` 调用方自动迁移 - - 直接 SQL 查询迁移:`dependency_graph.rs`, `registry/links.rs`, `registry/knowledge.rs`, `query.rs`, `oplog_analytics.rs`, `commands/simple.rs` - - `update_entity_metadata_field` 修复 `json_set` 字符串引号问题:原始字符串直接传递,`"null"` 时自动 `json_remove` - - `repo_tags` / `repo_remotes` 子表保留,通过 `repo_id` JOIN 读取(FK 仍指向 `repos`) - -## [0.11.1] - 2026-04-26 - -### Changed - -- **Phase 1 主从表切换 — Stage 0 完成**(entities 第一公民前置) - - Schema v20: Flat ID 命名空间迁移(`repo:devbase` → `devbase`,`skill:xxx` → `xxx`) - - `sync_repo_to_entities_by_id` 重构为 `upsert_entity_for_repo`:直接由 `RepoEntry` 写入 entities,不再读取 repos - - `update_repo_*` 改为先写 entities metadata(`json_set`),再写 repos - - `save_repo` 写入顺序反转:entities → repos → repo_tags → repo_remotes - - `run_tag` 补全 entities 双写:`sync_repo_tags_to_entity` - - `run_clean` 改为先删 entities,再删 repos(保留 CASCADE 行为) - - Skill entities 同步同理去除 `skill:` 前缀 - -## [0.11.0] - 2026-04-26 - -### Added - -- **AppContext Pool 化** — 全链路数据库连接池统一 - - `AppContext` 持 `r2d2::Pool`,替代单 `Connection` - - `scan`/`health`/`sync`/`backup`/`daemon`/`query` 等深层模块全部迁移 - - `init_db()` 调用点从 89 处降至 5 处合法保留(Pool 前 schema 引导 ×2、migrate 定义 ×1、workflow 测试辅助 ×2) - - 根治 `spawn_blocking` / `thread::spawn` 闭包无法传递裸 `Connection` 的问题 -- **MCP 测试隔离** — 全部 MCP 集成测试改用临时目录 - - `DEVBASE_DATA_DIR` 指向 `tempfile::TempDir` + `AppContext::with_defaults()` - - 多线程并发测试全部通过,无 flaky -- **Search 测试竞态自愈** — `SEARCH_TEST_LOCK` + 临时目录隔离,多线程 (`--test-threads=4`) 稳定通过 - -### Changed - -- `cargo test --all-targets`: 288 → 374 passed(+86 个新增/迁移测试) -- CI 测试并行度: `--test-threads=1` → `--test-threads=4`,回归测试耗时 ~13s → ~4s -- `rusqlite` 0.34 + `r2d2_sqlite` 0.27.0 版本锁定 - -## [0.9.0] - 2026-04-26 - -### Added - -- **Workflow Loop Step 完整执行** — 5 种 step 类型全部可执行 - - `StepType::Loop { for_each, body }`:遍历集合,执行 body 子步骤 - - 变量插值:`${loop.item}` / `${loop.index}` - - 结果聚合:stdout 按迭代索引标记,outputs 合并 - - 失败处理:单迭代失败按 body step 的 `on_error` 策略处理 -- **12 个新增单元测试** — model/interpolate/validator/executor 全覆盖 - -### Changed - -- `cargo test --all-targets`:267 → 279 passed - -## [0.8.0] - 2026-04-25 - -### Added - -- **Workflow 子类型执行** — Subworkflow / Parallel / Condition 全部可执行 - - `execute_subworkflow_step`:递归调用 `execute_workflow` - - `execute_parallel_step`:子步骤串行执行 + 结果聚合 - - `execute_condition_step`:字符串插值后 true/false 评估 -- **NLQ 自然语言查询结果可执行** — TUI `[:]` 搜索结果按 Enter 直接运行 skill -- **NLQ smoke test** — `run_nlp_selected_skill` 空列表/无技能/执行管道测试 -- **TUI SkillPanel 拆分** — `SkillPanelState` 提取 7 个字段,App 51→44 字段 - -### Fixed - -- 29 个生产代码 unwrap 全部清零 -- 30 个 clippy 警告清零 - -## [0.7.0] - 2026-04-20 - -### Added - -- **NLQ 自然语言查询** — TUI `[:]` 触发 embedding 语义搜索,fallback 降级文本搜索 -- **智能同步建议** — `sync/policy.rs::recommend_sync_action` 基于 safety/ahead/behind 生成建议 - -## [0.6.0] - 2026-04-18 - -### Added - -- **Mind Market 评分系统** — `skill_runtime::scoring` - - `success_rate` + `usage_count` + `rating`(0-5 分公式) - - CLI:`skill recalc-scores` / `skill top` / `skill recommend` -- **TUI Workflow 执行** — `[w]` 详情页 `r/Enter` 运行 + 结果弹窗 - -## [0.5.0] - 2026-04-17 - -### Added - -- **Workflow Engine v0.5.0** — YAML 编排多步骤自动化 - - 5 种 step 类型:skill / subworkflow / parallel / condition / loop - - 拓扑调度(Kahn 算法)+ batch 并行执行 - - 变量插值:`${inputs.x}` / `${steps.y.outputs.z}` - - 错误策略:Fail / Continue / Retry / Fallback - - Schema v17:`workflows` + `workflow_executions` 表 -- **CLI/TUI Workflow 集成** — `devbase workflow {list,show,register,run,delete}` + `[w]` 面板 - -## [0.4.0] - 2026-04-15 - -### Added - -- **Schema v16 统一实体模型** — `entity_types` + `entities` + `relations` 表,渐进双写 -- **Skill 自动封装** — `devbase skill discover ` 自动分析项目 CLI/API,生成 SKILL.md -- **Git URL Discover** — `devbase skill discover https://github.com/...` 克隆+分析+注册 -- **MCP `devkit_skill_discover`** — 35 tools 总数 - -## [0.3.0] - 2026-04-12 - -### Added - -- **34 MCP tools 全量通过 MCP Inspector** -- **README Quick Start 三步内跑通** -- **CI/CD** — `.github/workflows/ci.yml`(check / test / fmt / clippy on Windows) -- **GitHub Release 预编译二进制** - -## [0.2.4] - 2026-04-20 - -### Architecture - -- **Outboard Brain Embedding Architecture** — Embedding generation moved to external Skill/MCP Server - - `embedding.rs` stripped of Ollama/OpenAI generation logic; storage protocol only (`embedding_to_bytes`, `bytes_to_embedding`, `cosine_similarity`) - - `knowledge_engine.rs` no longer generates embeddings during indexing - - Aligns with "store + search in devbase, compute in Clarity/Skill" boundary - -### Changed - -- **Breaking** — `devkit_semantic_search` now accepts `query_embedding: number[]` instead of `query: string` - - Embedding generation is the caller's responsibility (external MCP Server or Skill) - - Removed `config.embedding.enabled` gate; search works as long as embeddings exist in DB - -### Added - -- **`devkit_embedding_store`** — Store externally-generated embedding vectors into SQLite - - Parameters: `repo_id`, `symbol_name`, `embedding: number[]` - - Upsert semantics (ON CONFLICT UPDATE) -- **`devkit_embedding_search`** — Alias for `devkit_semantic_search` with vector-based interface - - Same parameters and behavior, alternative name for workflow clarity -- **MCP tool count**: 25 → 31 - -## [0.2.4] - 2026-04-20 (continued) - -### Added - -- **`devkit_hybrid_search`** — Hybrid vector + keyword search via RRF merge (Beta) - - `search::hybrid.rs`: `rrf_merge()` (Reciprocal Rank Fusion, k=60), `keyword_search_symbols()` (SQLite LIKE on name/signature), `hybrid_search_symbols()` (auto-fallback to keyword when embedding missing) - - `registry::knowledge::hybrid_search_symbols()` wrapper - - Recommended default search tool for code concept discovery -- **`devkit_cross_repo_search`** — Cross-repository symbol search filtered by tags (Beta) - - `registry::knowledge::cross_repo_search_symbols()`: INTERSECT-based tag filtering (AND semantics), per-repo hybrid search, global dedup+sort - - Searches all repos matching ALL specified tags -- **`devkit_knowledge_report`** — Workspace knowledge coverage report (Beta) - - `src/oplog_analytics.rs`: `generate_report()` with table-existence guards for resilient querying - - Reports: repo_count, total_symbols, total_embeddings, total_calls, coverage_pct, per-repo breakdown, health_summary, recent_activity -- **`devkit_related_symbols`** — Explicit symbol-to-symbol knowledge links (Experimental) - - Schema v13: `code_symbol_links` table (source_repo, source_symbol, target_repo, target_symbol, link_type, strength) - - `src/symbol_links.rs`: `compute_similar_signature_links()` (Jaccard token overlap), `compute_co_located_links()` (same-file clustering) - - `generate_and_save_links()`: persists links with ON CONFLICT IGNORE upsert -- **External Embedding Provider** — Reference Python implementation in `examples/embedding-provider/` - - `index.py`: Ollama `/api/embeddings` client, batch generation, cross-platform registry DB path - - Byte-compatible f32 little-endian serialization via `struct.pack` - - CLI: `--repo-id`, `--model`, `--ollama-url`, `--batch-size`, `--force` -- **Schema v13** — `code_symbol_links` table for explicit conceptual relationships - -### Engineering - -- **Context Safety Mechanism** — Formalized as long-term architecture principle - - Sub-agent execution: serial + commit-isolated work directories (prevents compilation races) - - MCP tool idempotency: all state-mutating tools use ON CONFLICT UPDATE / transaction boundaries - - OpLog as immutable audit trail for all state transitions - ---- - -## [0.2.3] - 2026-04-20 - -### Added - -- **Semantic Vector Search (Wave 1)** — Cosine-similarity code symbol search - - `code_embeddings` table (Schema v11): `repo_id + symbol_name` PK, BLOB embedding, `generated_at` - - `embedding.rs`: Ollama/OpenAI-compatible generation + `cosine_similarity` + byte serialization - - `devkit_semantic_search` MCP tool (Beta): natural-language → embedding → top-K symbols -- **Multi-Language Symbol Extraction (Wave 2)** — tree-sitter AST parsing beyond Rust - - `tree-sitter-python`, `tree-sitter-typescript`, `tree-sitter-go` dependencies - - `SymbolType` expanded: Function, Struct, Enum, Trait, Impl, Module, Class, Interface, TypeAlias, Constant, Static - - Per-language call-target resolvers for Call Graph construction - - Languages supported: Rust, Python, JavaScript, TypeScript, Go -- **Call Graph Analysis** — Intra-repo function call relationship extraction - - `code_call_graph` table (Schema v10): caller → callee edges with line numbers - - `devkit_call_graph` MCP tool: "Who calls `register_tool`?" -- **Cross-Repo Dependency Graph expansion** - - `CMakeLists.txt` parsing: `find_package`, `add_subdirectory`, `FetchContent_Declare`, `target_link_libraries` - - `ManifestKind::CMake` added to dependency graph builder -- **Dead Code Detection** — `devkit_dead_code` MCP tool (Experimental) - - SQL `NOT EXISTS` query over call graph to find functions with zero incoming edges - - `LIKE 'pub%fn%'` heuristic to exclude non-public functions -- **arXiv Integration** — Pure string-parsing Atom XML fetcher (zero heavy XML deps) - - `arxiv.rs`: `PaperMetadata` with title/authors/summary/published/category - - `devkit_arxiv_fetch` MCP tool (Beta): fetch by arXiv ID -- **Performance Benchmarks** — Criterion suite (`benches/semantic_index.rs`) - - `index_repo_full` (small/medium/full parameterization) - - `cosine_similarity` (128/512/768 dims) - - `extract_symbols` (Rust/Python/Go comparison) - - `parse_cmake_lists` (CMake parsing) -- **Structured OpLog (Schema v12)** — Typed event system - - `OplogEventType` enum replacing free-text `operation` field - - JSON metadata + `duration_ms` for observability - - Migration: `CASE` mapping from legacy strings to enum variants - -### Fixed - -- **`scan` async panic** — `fetch_github_stars` now runs in `std::thread::spawn` isolation - - Prevents `reqwest::blocking::Client` drop inside tokio runtime from causing panic - - `block_on_async()` helper detects runtime context and uses `mpsc` or temporary runtime -- **Dead code false positives** — `pub fn` → `pub%fn%` SQL LIKE match covers `pub async fn` / `pub(crate) fn` / `pub unsafe fn` - - Excludes `main()` from dead code results -- **Clippy warnings** — 12+ lints resolved (`manual_strip`, `collapsible_if`, `FromStr`, `type_complexity`, `useless_format`, etc.) - -### Changed - -- **`nl_filter_repos`** — Now uses Tantivy full-text search as primary path - - Falls back to structured SQL filtering when Tantivy is unavailable - ---- - -## [0.2.2] - 2026-04-21 - -### Added - -- **Vault Backlinks** — Find notes that link to a given note - - `vault::backlinks:` query prefix - - TUI detail panel shows "被引用" section with backlink count and list - - MCP tool `devkit_vault_backlinks` — AI can discover note relationships - - `vault/backlinks.rs` with `build_backlink_index()` and `get_backlinks()` - -### Changed - -- **Schema v8** — `vault_notes` table no longer has `content` column - - Migration: auto-creates `vault_notes_v2`, migrates data, drops old table - - `save_vault_note` / `list_vault_notes` SQL updated to 8 columns - - Filesystem-first architecture now complete at the database level - -## [0.2.1] - 2026-04-20 - -### Added - -- **Vault Watch** — Filesystem watcher for `workspace/vault/` - - Auto-refresh TUI vault list when notes are edited externally - - 500ms debounce to avoid excessive reloads -- **Vault Tantivy Search** — `vault:` queries now use Tantivy full-text index - - Replaces slow SQLite LIKE + per-file reading - - Supports keyword scoring and ranking -- **MCP Registry Manifest** — `server.json` for official MCP Registry submission - -### Changed - -- `query.rs` vault branch: uses `search_vault()` instead of in-memory filtering - -## [0.2.0] - 2026-04-20 - -### Added - -- **Vault System** — Markdown note management with Obsidian-compatible PARA structure - - `vault/` directory with PARA folders: 00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - - Filesystem-first architecture: note content lives in `.md` files, SQLite only indexes metadata - - YAML frontmatter parsing (title, tags, aliases, date) - - WikiLink `[[...]]` extraction and backlink index building -- **TUI Vault View** — Press `Tab` to switch between Repo list and Vault note list - - Vault list shows note titles with tag indicators - - Detail panel previews note content (first 20 lines), tags, and outgoing links - - `Enter` opens selected note in VS Code -- **MCP Vault Tools** — 3 new tools for AI Agent vault interaction - - `devkit_vault_search` — full-text search across vault notes - - `devkit_vault_read` — read note content and frontmatter by path - - `devkit_vault_write` — write or append to vault notes -- **P2-lite: repos.toml** — Optional static configuration override for repositories - - Declare tags, tier, and workspace_type in `workspace/repos.toml` - - Overrides are applied on top of auto-discovered repo metadata -- **Unified Node Model** — `core::node::{Node, NodeType, Edge}` abstraction - - `NodeType::GitRepo | VaultNote | Asset | ExternalLink` - - Foundation for future Knowledge Graph unification -- **Workspace Directory** — `%LOCALAPPDATA%/devbase/workspace/` with `vault/` and `assets/` -- **MCP Client Config** — `mcp.json` for Claude Desktop / Cursor integration - -### Changed - -- **Architecture principle**: File system = source of truth; SQLite/Tantivy = derived index/cache -- Vault notes no longer store `content` in SQLite (read from disk on demand) - -## [0.1.0] - 2026-04-20 - -### Added - -- **TUI Dashboard** — Terminal UI for multi-repository workspace management - - Repository list with status icons, stars, and tag indicators - - Detail panel with Overview / Health / Insights tabs - - Stars Trend sparkline (30-day history) - - Help Overlay with categorized keyboard shortcuts - - Responsive layout: compact / standard / wide screen modes - - Cross-repository code search (ripgrep + Tantivy dual mode) - - One-key launch into gitui / lazygit -- **MCP Server** — 14 tools for AI Agent integration (stdio transport) - - `devkit_scan`, `devkit_health`, `devkit_sync`, `devkit_query_repos` - - `devkit_code_metrics`, `devkit_module_graph`, `devkit_natural_language_query` - - `devkit_index`, `devkit_query`, `devkit_note`, `devkit_digest` - - `devkit_github_info`, `devkit_paper_index`, `devkit_experiment_log` -- **Safe Sync Engine** — Four-tier sync policies: Mirror / Conservative / Rebase / Merge - - Pre-sync safety assessment (dirty, diverged, detached HEAD detection) - - Dry-run preview with per-repo recommendations - - Async batch sync with concurrency control and timeout -- **Registry & Indexing** — SQLite-backed workspace registry - - Automatic Git + non-Git workspace discovery - - Schema migrations with automatic backup snapshots - - GitHub Stars cache with TTL and historical tracking - - Tantivy full-text index for repository knowledge search -- **Health Monitoring** — Workspace-wide health checks - - Git status tracking (dirty / ahead / behind / diverged) - - Blake3 hash snapshots for non-Git workspaces - - Environment tool version detection -- **i18n** — Chinese and English bilingual support -- **CI/CD** — GitHub Actions workflow for check, test, fmt, clippy on Windows - -### Engineering - -- Modular architecture: 22 crates modules with clear separation of concerns -- Dual lib+bin mode: `lib.rs` exports all modules for programmatic use -- Theme system with semantic color tokens (dark/light ready) -- Render layer split from monolithic 1026-line file into 6 focused submodules - -### Security - -- `cargo audit` clean (0 vulnerabilities in direct dependencies) - -[0.20.1]: https://github.com/juice094/devbase/releases/tag/v0.20.1 -[0.20.0]: https://github.com/juice094/devbase/releases/tag/v0.20.0 -[0.19.0]: https://github.com/juice094/devbase/releases/tag/v0.19.0 -[0.18.0]: https://github.com/juice094/devbase/releases/tag/v0.18.0 -[0.1.0]: https://github.com/juice094/devbase/releases/tag/v0.1.0 +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- **FTS5 技能全文搜索** (Schema v35) — `skills_fts` 虚拟表 + 触发器,`search_skills_text()` 使用 BM25 排序(name=1.0, desc=0.8, tags=0.4, category=0.2),LIKE 降级 fallback +- **可插拔外部技能源** (Schema v36) — `SkillSource` trait + `GitHubSource` / `LocalFileSource`,`sync_sources` / `sync_log` 审计表,`devkit_skill_sync` MCP 工具(Beta tier),`devbase skill import` CLI 子命令 +- **Vault 多根目录 + symlink 跟随** — `VaultConfig`(roots + follow_symlinks),多根目录扫描 `scan_vault_with_options`,`resolve_vault_write_path` 实体回溯路径解析,`devbase vault sync` CLI +- **Ontology 导入** — `devkit_ontology_import` MCP 工具(Beta tier),`devbase ontology` CLI(`--dry-run` 预览),支持 OpenClaw workspace `ontology/entities/*.json` + `ontology/relations/*.jsonl` 批量导入 +- MCP 工具数: 69 → **71**(5 stable + 62 beta + 4 experimental) +- `devkit_document_convert` — Experimental tier MCP tool,PDF/PPTX → Markdown 转换(`pdftotext` / `python-pptx` 流水线),含 frontmatter 质量标注 +- Stable 工具 invocation 测试补全:`devkit_query_repos`、`devkit_vault_search`、`devkit_vault_read`、`devkit_status`、`devkit_workflow_list`、`devkit_index` +- `seed_repo()` 轻量测试 helper(仅插入 `entities` 表,无副作用) + +### Fixed + +- `mcp/tools/document_convert.rs` 原始字符串定界符修复(`r###"` 避免与 Python f-string `"##` 冲突) +- `cleanup_extracted_text` 单元测试期望值与实现语义对齐(保留最多 2 个连续空行) + +### Changed + +- **Workspace crate 架构重组** — 消除机械提取造成的微 crate 碎片 + - 合并 8 个 `devbase-registry-*` 微 crate(100–300 行/个)为统一 `devbase-registry`,含 8 个语义子模块(`entity`, `health`, `metrics`, `relation`, `call_graph`, `code_symbols`, `dead_code`, `workspace`) + - 拆分 10+ 个 monolithic `lib.rs` 为域驱动子模块:`devbase-embedding` (`candle`/`ollama`), `devbase-workflow-model` (`definition`/`execution`/`step_type`), `devbase-symbol-links` (`similarity`/`co_located`), `devbase-sync-protocol` (`index`/`version_vector`), `devbase-skill-runtime-types` (`skill_type`/`execution`/`params`), `devbase-skill-runtime-parser` (`frontmatter`/`field_parsers`), `devbase-workflow-interpolate` (`resolver`), `devbase-vault-frontmatter` (`parser`), `devbase-vault-wikilink` (`parser`), `devbase-core-types` (`node_type`/`node`/`edge`) + - 全 workspace `Cargo.toml` 统一使用 `[workspace.package]` 继承(`version`, `edition`, `authors`, `license`, `repository`) +- `KNOWN_ISSUES.md` 更新:document_convert 从 P3 债务移至已解决归档;测试计数 485→494 +- `docs/reference/mcp-tools.md` 修正为 69 个工具,补充 Index / Workflow / Relation / KnownLimit / Session 分类 +- `docs/reference/stable-tools/README.md` 修正为 5 个 Stable 工具(删除过时的 `project_brief.md` / `hybrid_search.md` / `session_recall.md`) + +## [0.20.1] - 2026-05-17 + +### Added + +- **Phase 1 Production Hardening** + - Workflow E2E 测试 — `src/mcp/tools/workflow.rs`:DAG 成功执行、失败传播验证 + - RF-7 路径隐私脱敏 — `sanitize_path()` 自动掩码 home 目录为 `~` + - Tantivy 一致性修复 — `repair_tantivy_consistency_at()` 启动时自动检测 orphan/missing 文档 + - 性能回归基线 — `test_keyword_search_latency_regression_1k` / `_10k`(profile-aware 阈值) + - `TempStorageBackend` — 测试隔离后端,消除 `DEVBASE_DATA_DIR` 竞态 +- **Architecture Invariants CI 自动化** — `scripts/invariant-checks/run-checks.ps1` + - G5 (RF-6):diff-only 检测新增生产代码 `unwrap`/`expect`/`panic`(排除 `#[cfg(test)]`) + - T11:`mcp/tools` 禁止直接调用 `rusqlite::Connection` + - T12:`tui/render` 纯消费检查(禁止写入操作) + +### Fixed + +- `AppContext::with_storage()` 使用实际 storage backend 的 `index_path()` 而非硬编码默认值 +- G5 invariant checker 正则修复:`tests.rs` 文件正确跳过 +- `Cargo.lock` 同步版本 bump(修复 `--locked` release 构建失败) +- 平台相关测试隔离:`C:\` 路径断言加 `#[cfg(windows)]`,Linux `python3` 断言适配 +- HuggingFace 网络依赖测试加 `#[ignore]`(避免 CI TLS 证书失败) + +## [0.20.0] - 2026-05-16 + +### Added + +- **知识完备性**:Vault 双向链接图遍历(BFS depth 1-3)+ `[[note#heading]]` block 引用 +- **Vault 笔记历史追踪** — Git-based blob diff,`devkit_vault_history` tool +- **混合检索质量监控** — `HybridSearchMetrics`(latency/recall/overlap/keyword_source) +- **性能回归基线** — Criterion benchmarks:`index_repo_full`、`cosine_similarity`、`extract_symbols` +- **客户端无关原则** — `StorageBackend` trait 完整实现,解耦 `dirs::data_local_dir()` 硬编码 +- **MCP Tools +4** (68 total) + - `devkit_vault_history`, `devkit_vault_export`, `devkit_vault_graph`, `devkit_vault_daily` + +### Changed + +- 20+ 独立 crate 零循环依赖,workspace 拆分完成 +- `entities` 表成为唯一真相源,`repos` 表彻底删除 +- Tantivy / SQLite 补偿扫描:启动时自动同步 orphan 文档 + +## [0.19.0] - 2026-05-14 + +### Added + +- **SQLite WAL 模式** — `r2d2` 连接池 + WAL journal,并发安全与增量备份 +- **Tantivy 健康评分** — `devkit_index_health`:损坏检测、自动重建、孤儿文档清理 +- **Vault 导出** — `devkit_vault_export`:Obsidian-compatible Markdown 批量导出 +- **Redis ADR 决策** — `docs/architecture/adr-003-redis.md`:评估后决定保持 SQLite 优先 +- **OpLog 审计追踪** — 结构化事件类型 `OplogEventType`,全操作不可变日志 + +### Changed + +- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 +- 索引层反向一致性扫描与自动修复能力 + +## [0.18.0] - 2026-05-13 + +### Added + +- **ClaudeCode 工作流集成** — `docs/RFC/claudecode-workflow-integration.md` + - `devkit_project_brief` — 生成项目 Markdown 简报(架构 + 模块 + 近期提交 + 已知约束),用于 `.claude/CLAUDE.md` 注入 + - `devkit_impact_analysis` — 符号级变更影响半径分析(BFS 调用图遍历 + 相关符号发现 + 测试启发式 + 历史 oplog) + - `scripts/devbase-claude.ps1` — PowerShell 一键启动器:自动检测 repo → 生成简报 → 注入 `.claude/CLAUDE.md` → 启动 `claude` → 可选捕获退出 diff +- **Session 导入/导出工具** + - `devkit_session_export` — 导出会话为 Markdown / JSON;支持记忆类型图标与元数据 + - `devkit_session_import` — 从 bulk text 批量导入记忆(`[type]` 前缀解析) +- **MCP Tools +4** (64 total) + - `devkit_project_brief`, `devkit_impact_analysis`, `devkit_session_export`, `devkit_session_import` +- **TUI Session 视图硬化** + - 三态 MainView 切换:`RepoList → VaultList → Session`(`Tab` 键循环) + - Session 列表:状态图标(● active / ◌ archived)+ 高亮样式 + - Session 详情:记忆类型图标(◆ decision / ▪ constraint / ★ discovery / ✗ error)+ embedding model 标签 + indexed 状态 +- **AGENTS.md** 同步至 v0.18.0-dev 基线(64 Tools / 437 tests) + +### Changed + +- `src/mcp/mod.rs` Tool 注册表扩展至 64 工具(稳定 + Beta) +- `src/mcp/tests.rs` 工具计数断言同步 +- TUI `render_session.rs` / `state/mod.rs` 适配 Schema v34 记忆字段(`embedding_model`, `indexed_at`) + +## [0.17.0] - 2026-05-13 + +### Added + +- **Agent Memory 向量存储** — Schema v34 + - `agent_memories` 新增 `embedding BLOB`, `embedding_model TEXT`, `indexed_at DATETIME` + - Partial index `idx_agent_memories_embedding` 仅索引含向量的行 + - `AgentMemory` 结构体扩展向量元数据字段 +- **SQLite UDF: `cosine_similarity`** — `src/registry/agent_context.rs` + - 输入: 两个 little-endian f32 BLOB + - 输出: REAL ∈ [-1.0, 1.0] + - 注册时机: `WorkspaceRegistry::init_db_at` 迁移完成后自动注册 +- **语义记忆搜索** — `search_memories_semantic(context_id, query_embedding, limit)` + - 纯 SQL `ORDER BY cosine_similarity(embedding, ?) DESC` + - 零 LLM 运行时依赖;仅执行向量比对 +- **MCP Tools +2** (60 total) + - `devkit_session_recall` — 外部向量查询 + 语义召回 top-k memories + - `devkit_session_index` — 为已有 memory 注入外部生成 embedding +- **Skill Runtime Auto-Recall** — `src/skill_runtime/executor.rs` + - Tier 1: Semantic recall (本地 Candle/Ollama 或外部 HTTP endpoint) + - Tier 2: Keyword fallback (`LIKE` search on `content`) + - 新环境变量: `DEVBASE_CONTEXT_MEMORY_COUNT`, `DEVBASE_CONTEXT_RECALL_METHOD` + - `DEVBASE_CONTEXT_MEMORIES` 升级为 top-k 相关 memories(含 `score` + `model`) +- **外部 Embedding Provider 集成** + - `call_external_embedding_endpoint` — `reqwest::blocking` POST `/api/embeddings` + - 配置驱动: `config.toml [embedding]` (enabled/provider/model/base_url/timeout) + - 端到端测试: mock TCP server 验证 Ollama 格式解析 + 错误码处理 +- **RFC 文档** — `docs/RFC/agent-memory-vector-storage.md` + - 架构决策: devbase = 向量数据库层,不做 embedding 生成 + - 参照 pgvector 边界设计 + +### Changed + +- **Feature Flags**: `embedding` 从 `default` 移除 + - Candle/Ollama 依赖变为 opt-in: `--features embedding` + - 默认构建零 ML 依赖,编译时间减少 30~50% +- `insert_memory` 签名扩展: 新增可选 `embedding: Option<&[f32]>` 和 `embedding_model: Option<&str>` +- `list_memories` / `search_memories` SELECT 语句扩展为 8 列(兼容新增字段) +- AGENTS.md 同步至 v0.17.0-dev 基线 + +### Breaking Changes + +- 默认构建不再包含 `devbase-embedding` crate;需要语义生成能力的用户须显式启用 `--features embedding` +- `generate_query_embedding` 在默认构建下返回错误(提示启用 feature 或配置外部 endpoint) + +## [0.16.1] - 2026-05-13 + +### Added + +- **Workflow-Session Binding** — Schema v33 + - `workflow_executions` 新增 `context_id` 列 + 索引 + - `create_execution` 自动绑定 `resolve_active_context()` + - MCP `devkit_workflow_run` 与 CLI `workflow run` 均支持自动绑定 + - `devkit_session_workflows` tool: 列出指定 context 的 workflow 执行历史 +- `context_entity_links` 表 (Schema v32): context 与任意 entity 的多对多关联 + +## [0.16.0] - 2026-05-13 + +### Added + +- **Agent Contexts (AI Agent OS)** — Schema v31 + - `agent_contexts` 表: 持久化 AI session / project scope + - `agent_memories` 表: 结构化记忆(decision/constraint/note/discovery/error) + - 9 个 Session MCP tools: save/list/resume/attach/detach/activate/search/capture/workflows + - `resolve_active_context()`: 环境变量 `DEVBASE_ACTIVE_CONTEXT` → 文件 `.active_context` fallback + - Context-aware Skill Runtime: 注入 `DEVBASE_ACTIVE_CONTEXT` + `DEVBASE_CONTEXT_MEMORIES` + `DEVBASE_CONTEXT_LINKS` + - 所有 agent_context 操作自动写入 OpLog (`OplogEventType::AgentContext`) + +## [0.15.0] - 2026-05-04 + +### Added + +- **P1: Tantivy BM25 代码符号搜索** — `search/symbol_index.rs` + - 独立 Schema (`repo_id`, `name`, `signature`, `file_path`, `line_start`) + - `keyword_search_symbols` 主路径走 Tantivy BM25,SQLite LIKE 回退 + - 索引流程 `index.rs` 自动同步写入 symbol_index + - `StorageBackend` 扩展 `symbol_index_path()`(6 实现) +- **P3: Embedding 多后端** — Candle (默认) + Ollama (配置切换) + - 新增 `OllamaProvider` (`ureq` HTTP `/api/embed`) + - `create_provider(backend, model, base_url, timeout)` 配置化创建 + - `generate_query_embedding` 通过 `OnceLock` 懒加载配置化 provider + - 默认模型改为 `all-minilm` (384-dim,与 Candle 维度兼容) +- **P4: Health 环境检测扩展** — `EnvVersionCache` 从 5 工具 → 9 工具 + - 新增: `python`, `bun`, `zig`, `java` + - `get_tool_version` 支持 stderr fallback (Java 输出到 stderr) + - `fmt_version` 改进: Java 引号提取、Docker/Python 格式处理 +- **P5: 架构不变量自动化 CI** — `scripts/invariant-checks/run-checks.ps1` + - G5: diff-only 检测新增生产代码 unwrap/expect/panic(排除 `#[cfg(test)]`) + - T11: 检测 `mcp/tools/*` 直接调用 `rusqlite::Connection` + - T12: 检测 `tui/render/*` 写入操作 + - CI job `invariant-check` 加入 `.github/workflows/ci.yml` +- **P2 Phase 1: AppContext 职责拆分** — 6 个 Client trait impl 迁出 `storage.rs` + - `scan.rs` / `health.rs` / `sync.rs` / `digest.rs` / `knowledge_engine/mod.rs` / `registry.rs` + - `storage.rs` 860 → 430 行 (-50%) + - 删除冗余 `conn_mut()` +- **P2 Phase 2: 内联 SQL 下沉** — 新增 `registry/code_symbols.rs` + `registry/dead_code.rs` + - `CodeSymbolRow` / `DeadCodeRow` + 纯函数查询 (12 个单元测试) + - `RegistryClient` 退化为纯代理层 + +### Changed + +- `EmbeddingConfig` 默认模型 `nomic-embed-text` → `all-minilm` (384-dim) +- AGENTS.md 阶段描述更新: v0.14.3 → v0.15.0 推进中 → v0.15.0 全部完成 + +### Fixed + +- **TTL 缓存负值 bug** (`97172ec`): `elapsed < ttl_seconds` → `elapsed >= 0 && elapsed < ttl_seconds` + - 防止系统时间回溯导致缓存永不过期 +- `crates/devbase-embedding/src/lib.rs` 遗留 unwrap 清零 (`encode_with_candle` → `ok_or`) + +## [0.14.3] - 2026-05-05 + +### Added + +- **Schema v30** — `code_symbols.attributes` 列,tree-sitter 提取 `#[test]`/`#[tokio::test]` 等属性 + - `devkit_dead_code` 自动过滤测试函数,消除假阳性 + - `rust_node_to_symbol` 支持 `prev_sibling()` 回溯收集属性节点 +- **Tantivy/SQLite 补偿扫描** — 启动时自动检测并清理 orphan 文档 + - 新增 `search::sync_index_to_db(conn)`,对比 Tantivy `list_indexed_repo_ids()` 与 SQLite `entities` + - `AppContext` 初始化后自动调用,失败仅 warn 不阻塞启动 +- **Feature flags** — `mcp` + `embedding`,支持 `--no-default-features` 最小化编译 + - `default = ["tui", "mcp", "embedding"]` + - `devbase-embedding` 设为 `optional = true` + - 新增 `src/clients.rs` 提取 MCP client traits,避免 mcp feature 关闭时 trait 不可用 +- **Kimi CLI MCP 集成文档** — AGENTS.md 新增 Kimi CLI 集成状态,项目级 skill 位于 `.kimi/skills/devbase-project/` + +### Changed + +- **RF-1 架构红线** — `init_db()` 全局路径残留清零 + - `init_db()` 标记 `#[deprecated]`,新增 `init_db_with(backend: &dyn StorageBackend)` + - `workflow/executor.rs`、`workflow/state.rs`、`storage.rs` 全部改为注入式 + - `examples/` + `benches/` 中额外 5 处残留同步修复 +- `index_repo_full` 合并用户 `scan.exclude_patterns` 与默认排除模式 +- `cargo fmt` + `cargo clippy --fix` 全量格式化(8 文件,6 处 warning 修复) +- `CONTRIBUTING.md` 新增 sccache 构建加速指南 + +### Fixed + +- `cargo clippy --all-targets -D warnings` — 7 warnings → 0 +- `cargo fmt --check` — 全量通过 + +## [0.14.2] - 2026-05-02 + +### Changed + +- health dirty 检测修复(排除 ignored 文件) +- scan 路径规范化 + syncthing-rust 识别修复 +- experiment_log / CodeMetrics / ModuleGraph / CallGraph / DeadCode 提升为 Beta tier +- 48 tools: Stable 5 / Beta 40 / Experimental 3 + +## [0.14.1] - 2026-05-01 + +### Added + +- CLI JSON 输出补全 (`--json` / `--recalc`) +- relations MCP 工具加固 +- License headers 全量补录 +- Vault Daily / Vault Graph MCP tools + +## [0.14.0] - 2026-04-28 + +### Added + +- Workspace 拆分:6 个零耦合 crate 提取 +- MCP trait 化:`mcp/tools/repo.rs` `crate::` 引用 68→41 + +## [0.13.0] - 2026-04-26 + +### Added + +- Registry God Object 拆解:10 子模块提取为 free function +- `WorkspaceRegistry` 退化为纯 facade + +## [0.12.0] - 2026-04-30 + +### Added + +- **Schema v22** — drop `vault_notes`, `papers`, `workflows` orphan tables; `entities` becomes sole source of truth for all entity types +- **Managed-Gate Fail-Safe Defaults** — `devbase sync` defaults to managed repos only + - Management tags: `mirror`, `reference`, `third-party`, `collaborative`, `team`, `own-project`, `tool`, `active`, `managed` + - Untagged / non-management repos are registered but skipped by default sync + - `--filter-tags` bypasses the gate for explicit selection +- **`.devbase-ignore`** — directory-level opt-out exclusion during scan +- `scan --register` no longer auto-tags repos with `"discovered"` +- i18n hint for unmanaged repos + +### Changed + +- `inspect_repo`: remove `"discovered"` from default tags; `-main`/`-master` repos keep `zip-snapshot` + `needs-migration` +- `collect_tasks`: default mode filters by management tags +- All `list_workflows` / `list_papers` / `list_vault_notes` queries migrated to `entities` table + `json_extract` +- Generic `upsert_entity` abstraction for entity dual-write +- `ENTITY_TYPE_*` constants extracted across 10 files (~25 replacements) +- `cargo test --lib`: 374 → 379 passed + +### Breaking Changes + +- Existing repos tagged `"discovered"` are **no longer synced by default**. + Use `devbase tag managed` (or any management tag) to opt a repo into automatic sync. + +## [0.10.0] - 2026-04-26 + +### Added + +- **L3 Risk Layer MVP** — `known_limits` 表 + Registry CRUD + MCP tools + CLI subcommand + - Schema v18: `known_limits` 表(id, category, description, source, severity, first_seen_at, last_checked_at, mitigated) + - Registry CRUD: `save`/`get`/`list`/`delete`/`resolve`/`seed_hard_vetoes` + - MCP tools: `devkit_known_limit_store` / `devkit_known_limit_list`(Beta tier) + - CLI: `devbase limit {add,list,resolve,delete,seed}` + - OpLog 集成: create/update/resolve/delete/seed 自动写入 oplog(event_type = `KnownLimit`) + - Hard Veto 种子: AGENTS.md 中的 5 条硬约束自动填充 +- **L4 元认知层 MVP** — `knowledge_meta` 表 + L3-L4 联动 + - Schema v19: `knowledge_meta` 表(id, target_level, target_id, correction_type, correction_json, confidence, created_at) + - Registry CRUD: `save`/`get`/`list`/`delete` + - CLI 联动: `devbase limit resolve --reason "..."` 自动创建 L4 meta 记录 +- **Hard Veto 运行时守卫** — Skill 执行前自动检查未解决 hard veto + - `skill_runtime::executor::run_skill` 执行前查询 `known_limits` + - 未解决 hard veto 存在时,警告注入 `stderr`,同时写入 OpLog + - 零破坏性:skill 仍执行成功,但输出中包含 `[HARD-VETO-WARNING]` + +### Changed + +- `cargo test --all-targets`: 279 → 288 passed +- MCP tool 总数: 35 → 37 + +## [0.11.3] - 2026-04-26 + +### Changed + +- **Phase 1 主从表切换 — Stage 3 完成**(`repos` 表删除) + - `save_repo` / `update_repo_*` / `run_clean` 不再写入 `repos` + - Schema v21 迁移:重建 11 个子表(去 FK)→ 删除 `repos` 表 + - `test_helpers.rs` SCHEMA_DDL 同步去 `repos` + 去 FK + - `entities` 成为真正的读写唯一数据源 + +## [0.11.2] - 2026-04-26 + +### Changed + +- **Phase 1 主从表切换 — Stage 2 完成**(读路径迁移) + - `list_repos` / `list_repos_stale_health` / `list_repos_need_index` / `list_workspaces_by_tier` 全部改为从 `entities` 读取(`json_extract`) + - `digest.rs` / `health.rs` / `daemon.rs` / `backup.rs` / `knowledge_engine.rs` / `sync/*.rs` / `tui/state.rs` / `mcp/tools/repo.rs` 等所有 `list_repos()` 调用方自动迁移 + - 直接 SQL 查询迁移:`dependency_graph.rs`, `registry/links.rs`, `registry/knowledge.rs`, `query.rs`, `oplog_analytics.rs`, `commands/simple.rs` + - `update_entity_metadata_field` 修复 `json_set` 字符串引号问题:原始字符串直接传递,`"null"` 时自动 `json_remove` + - `repo_tags` / `repo_remotes` 子表保留,通过 `repo_id` JOIN 读取(FK 仍指向 `repos`) + +## [0.11.1] - 2026-04-26 + +### Changed + +- **Phase 1 主从表切换 — Stage 0 完成**(entities 第一公民前置) + - Schema v20: Flat ID 命名空间迁移(`repo:devbase` → `devbase`,`skill:xxx` → `xxx`) + - `sync_repo_to_entities_by_id` 重构为 `upsert_entity_for_repo`:直接由 `RepoEntry` 写入 entities,不再读取 repos + - `update_repo_*` 改为先写 entities metadata(`json_set`),再写 repos + - `save_repo` 写入顺序反转:entities → repos → repo_tags → repo_remotes + - `run_tag` 补全 entities 双写:`sync_repo_tags_to_entity` + - `run_clean` 改为先删 entities,再删 repos(保留 CASCADE 行为) + - Skill entities 同步同理去除 `skill:` 前缀 + +## [0.11.0] - 2026-04-26 + +### Added + +- **AppContext Pool 化** — 全链路数据库连接池统一 + - `AppContext` 持 `r2d2::Pool`,替代单 `Connection` + - `scan`/`health`/`sync`/`backup`/`daemon`/`query` 等深层模块全部迁移 + - `init_db()` 调用点从 89 处降至 5 处合法保留(Pool 前 schema 引导 ×2、migrate 定义 ×1、workflow 测试辅助 ×2) + - 根治 `spawn_blocking` / `thread::spawn` 闭包无法传递裸 `Connection` 的问题 +- **MCP 测试隔离** — 全部 MCP 集成测试改用临时目录 + - `DEVBASE_DATA_DIR` 指向 `tempfile::TempDir` + `AppContext::with_defaults()` + - 多线程并发测试全部通过,无 flaky +- **Search 测试竞态自愈** — `SEARCH_TEST_LOCK` + 临时目录隔离,多线程 (`--test-threads=4`) 稳定通过 + +### Changed + +- `cargo test --all-targets`: 288 → 374 passed(+86 个新增/迁移测试) +- CI 测试并行度: `--test-threads=1` → `--test-threads=4`,回归测试耗时 ~13s → ~4s +- `rusqlite` 0.34 + `r2d2_sqlite` 0.27.0 版本锁定 + +## [0.9.0] - 2026-04-26 + +### Added + +- **Workflow Loop Step 完整执行** — 5 种 step 类型全部可执行 + - `StepType::Loop { for_each, body }`:遍历集合,执行 body 子步骤 + - 变量插值:`${loop.item}` / `${loop.index}` + - 结果聚合:stdout 按迭代索引标记,outputs 合并 + - 失败处理:单迭代失败按 body step 的 `on_error` 策略处理 +- **12 个新增单元测试** — model/interpolate/validator/executor 全覆盖 + +### Changed + +- `cargo test --all-targets`:267 → 279 passed + +## [0.8.0] - 2026-04-25 + +### Added + +- **Workflow 子类型执行** — Subworkflow / Parallel / Condition 全部可执行 + - `execute_subworkflow_step`:递归调用 `execute_workflow` + - `execute_parallel_step`:子步骤串行执行 + 结果聚合 + - `execute_condition_step`:字符串插值后 true/false 评估 +- **NLQ 自然语言查询结果可执行** — TUI `[:]` 搜索结果按 Enter 直接运行 skill +- **NLQ smoke test** — `run_nlp_selected_skill` 空列表/无技能/执行管道测试 +- **TUI SkillPanel 拆分** — `SkillPanelState` 提取 7 个字段,App 51→44 字段 + +### Fixed + +- 29 个生产代码 unwrap 全部清零 +- 30 个 clippy 警告清零 + +## [0.7.0] - 2026-04-20 + +### Added + +- **NLQ 自然语言查询** — TUI `[:]` 触发 embedding 语义搜索,fallback 降级文本搜索 +- **智能同步建议** — `sync/policy.rs::recommend_sync_action` 基于 safety/ahead/behind 生成建议 + +## [0.6.0] - 2026-04-18 + +### Added + +- **Mind Market 评分系统** — `skill_runtime::scoring` + - `success_rate` + `usage_count` + `rating`(0-5 分公式) + - CLI:`skill recalc-scores` / `skill top` / `skill recommend` +- **TUI Workflow 执行** — `[w]` 详情页 `r/Enter` 运行 + 结果弹窗 + +## [0.5.0] - 2026-04-17 + +### Added + +- **Workflow Engine v0.5.0** — YAML 编排多步骤自动化 + - 5 种 step 类型:skill / subworkflow / parallel / condition / loop + - 拓扑调度(Kahn 算法)+ batch 并行执行 + - 变量插值:`${inputs.x}` / `${steps.y.outputs.z}` + - 错误策略:Fail / Continue / Retry / Fallback + - Schema v17:`workflows` + `workflow_executions` 表 +- **CLI/TUI Workflow 集成** — `devbase workflow {list,show,register,run,delete}` + `[w]` 面板 + +## [0.4.0] - 2026-04-15 + +### Added + +- **Schema v16 统一实体模型** — `entity_types` + `entities` + `relations` 表,渐进双写 +- **Skill 自动封装** — `devbase skill discover ` 自动分析项目 CLI/API,生成 SKILL.md +- **Git URL Discover** — `devbase skill discover https://github.com/...` 克隆+分析+注册 +- **MCP `devkit_skill_discover`** — 35 tools 总数 + +## [0.3.0] - 2026-04-12 + +### Added + +- **34 MCP tools 全量通过 MCP Inspector** +- **README Quick Start 三步内跑通** +- **CI/CD** — `.github/workflows/ci.yml`(check / test / fmt / clippy on Windows) +- **GitHub Release 预编译二进制** + +## [0.2.4] - 2026-04-20 + +### Architecture + +- **Outboard Brain Embedding Architecture** — Embedding generation moved to external Skill/MCP Server + - `embedding.rs` stripped of Ollama/OpenAI generation logic; storage protocol only (`embedding_to_bytes`, `bytes_to_embedding`, `cosine_similarity`) + - `knowledge_engine.rs` no longer generates embeddings during indexing + - Aligns with "store + search in devbase, compute in Clarity/Skill" boundary + +### Changed + +- **Breaking** — `devkit_semantic_search` now accepts `query_embedding: number[]` instead of `query: string` + - Embedding generation is the caller's responsibility (external MCP Server or Skill) + - Removed `config.embedding.enabled` gate; search works as long as embeddings exist in DB + +### Added + +- **`devkit_embedding_store`** — Store externally-generated embedding vectors into SQLite + - Parameters: `repo_id`, `symbol_name`, `embedding: number[]` + - Upsert semantics (ON CONFLICT UPDATE) +- **`devkit_embedding_search`** — Alias for `devkit_semantic_search` with vector-based interface + - Same parameters and behavior, alternative name for workflow clarity +- **MCP tool count**: 25 → 31 + +## [0.2.4] - 2026-04-20 (continued) + +### Added + +- **`devkit_hybrid_search`** — Hybrid vector + keyword search via RRF merge (Beta) + - `search::hybrid.rs`: `rrf_merge()` (Reciprocal Rank Fusion, k=60), `keyword_search_symbols()` (SQLite LIKE on name/signature), `hybrid_search_symbols()` (auto-fallback to keyword when embedding missing) + - `registry::knowledge::hybrid_search_symbols()` wrapper + - Recommended default search tool for code concept discovery +- **`devkit_cross_repo_search`** — Cross-repository symbol search filtered by tags (Beta) + - `registry::knowledge::cross_repo_search_symbols()`: INTERSECT-based tag filtering (AND semantics), per-repo hybrid search, global dedup+sort + - Searches all repos matching ALL specified tags +- **`devkit_knowledge_report`** — Workspace knowledge coverage report (Beta) + - `src/oplog_analytics.rs`: `generate_report()` with table-existence guards for resilient querying + - Reports: repo_count, total_symbols, total_embeddings, total_calls, coverage_pct, per-repo breakdown, health_summary, recent_activity +- **`devkit_related_symbols`** — Explicit symbol-to-symbol knowledge links (Experimental) + - Schema v13: `code_symbol_links` table (source_repo, source_symbol, target_repo, target_symbol, link_type, strength) + - `src/symbol_links.rs`: `compute_similar_signature_links()` (Jaccard token overlap), `compute_co_located_links()` (same-file clustering) + - `generate_and_save_links()`: persists links with ON CONFLICT IGNORE upsert +- **External Embedding Provider** — Reference Python implementation in `examples/embedding-provider/` + - `index.py`: Ollama `/api/embeddings` client, batch generation, cross-platform registry DB path + - Byte-compatible f32 little-endian serialization via `struct.pack` + - CLI: `--repo-id`, `--model`, `--ollama-url`, `--batch-size`, `--force` +- **Schema v13** — `code_symbol_links` table for explicit conceptual relationships + +### Engineering + +- **Context Safety Mechanism** — Formalized as long-term architecture principle + - Sub-agent execution: serial + commit-isolated work directories (prevents compilation races) + - MCP tool idempotency: all state-mutating tools use ON CONFLICT UPDATE / transaction boundaries + - OpLog as immutable audit trail for all state transitions + +--- + +## [0.2.3] - 2026-04-20 + +### Added + +- **Semantic Vector Search (Wave 1)** — Cosine-similarity code symbol search + - `code_embeddings` table (Schema v11): `repo_id + symbol_name` PK, BLOB embedding, `generated_at` + - `embedding.rs`: Ollama/OpenAI-compatible generation + `cosine_similarity` + byte serialization + - `devkit_semantic_search` MCP tool (Beta): natural-language → embedding → top-K symbols +- **Multi-Language Symbol Extraction (Wave 2)** — tree-sitter AST parsing beyond Rust + - `tree-sitter-python`, `tree-sitter-typescript`, `tree-sitter-go` dependencies + - `SymbolType` expanded: Function, Struct, Enum, Trait, Impl, Module, Class, Interface, TypeAlias, Constant, Static + - Per-language call-target resolvers for Call Graph construction + - Languages supported: Rust, Python, JavaScript, TypeScript, Go +- **Call Graph Analysis** — Intra-repo function call relationship extraction + - `code_call_graph` table (Schema v10): caller → callee edges with line numbers + - `devkit_call_graph` MCP tool: "Who calls `register_tool`?" +- **Cross-Repo Dependency Graph expansion** + - `CMakeLists.txt` parsing: `find_package`, `add_subdirectory`, `FetchContent_Declare`, `target_link_libraries` + - `ManifestKind::CMake` added to dependency graph builder +- **Dead Code Detection** — `devkit_dead_code` MCP tool (Experimental) + - SQL `NOT EXISTS` query over call graph to find functions with zero incoming edges + - `LIKE 'pub%fn%'` heuristic to exclude non-public functions +- **arXiv Integration** — Pure string-parsing Atom XML fetcher (zero heavy XML deps) + - `arxiv.rs`: `PaperMetadata` with title/authors/summary/published/category + - `devkit_arxiv_fetch` MCP tool (Beta): fetch by arXiv ID +- **Performance Benchmarks** — Criterion suite (`benches/semantic_index.rs`) + - `index_repo_full` (small/medium/full parameterization) + - `cosine_similarity` (128/512/768 dims) + - `extract_symbols` (Rust/Python/Go comparison) + - `parse_cmake_lists` (CMake parsing) +- **Structured OpLog (Schema v12)** — Typed event system + - `OplogEventType` enum replacing free-text `operation` field + - JSON metadata + `duration_ms` for observability + - Migration: `CASE` mapping from legacy strings to enum variants + +### Fixed + +- **`scan` async panic** — `fetch_github_stars` now runs in `std::thread::spawn` isolation + - Prevents `reqwest::blocking::Client` drop inside tokio runtime from causing panic + - `block_on_async()` helper detects runtime context and uses `mpsc` or temporary runtime +- **Dead code false positives** — `pub fn` → `pub%fn%` SQL LIKE match covers `pub async fn` / `pub(crate) fn` / `pub unsafe fn` + - Excludes `main()` from dead code results +- **Clippy warnings** — 12+ lints resolved (`manual_strip`, `collapsible_if`, `FromStr`, `type_complexity`, `useless_format`, etc.) + +### Changed + +- **`nl_filter_repos`** — Now uses Tantivy full-text search as primary path + - Falls back to structured SQL filtering when Tantivy is unavailable + +--- + +## [0.2.2] - 2026-04-21 + +### Added + +- **Vault Backlinks** — Find notes that link to a given note + - `vault::backlinks:` query prefix + - TUI detail panel shows "被引用" section with backlink count and list + - MCP tool `devkit_vault_backlinks` — AI can discover note relationships + - `vault/backlinks.rs` with `build_backlink_index()` and `get_backlinks()` + +### Changed + +- **Schema v8** — `vault_notes` table no longer has `content` column + - Migration: auto-creates `vault_notes_v2`, migrates data, drops old table + - `save_vault_note` / `list_vault_notes` SQL updated to 8 columns + - Filesystem-first architecture now complete at the database level + +## [0.2.1] - 2026-04-20 + +### Added + +- **Vault Watch** — Filesystem watcher for `workspace/vault/` + - Auto-refresh TUI vault list when notes are edited externally + - 500ms debounce to avoid excessive reloads +- **Vault Tantivy Search** — `vault:` queries now use Tantivy full-text index + - Replaces slow SQLite LIKE + per-file reading + - Supports keyword scoring and ranking +- **MCP Registry Manifest** — `server.json` for official MCP Registry submission + +### Changed + +- `query.rs` vault branch: uses `search_vault()` instead of in-memory filtering + +## [0.2.0] - 2026-04-20 + +### Added + +- **Vault System** — Markdown note management with Obsidian-compatible PARA structure + - `vault/` directory with PARA folders: 00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta + - Filesystem-first architecture: note content lives in `.md` files, SQLite only indexes metadata + - YAML frontmatter parsing (title, tags, aliases, date) + - WikiLink `[[...]]` extraction and backlink index building +- **TUI Vault View** — Press `Tab` to switch between Repo list and Vault note list + - Vault list shows note titles with tag indicators + - Detail panel previews note content (first 20 lines), tags, and outgoing links + - `Enter` opens selected note in VS Code +- **MCP Vault Tools** — 3 new tools for AI Agent vault interaction + - `devkit_vault_search` — full-text search across vault notes + - `devkit_vault_read` — read note content and frontmatter by path + - `devkit_vault_write` — write or append to vault notes +- **P2-lite: repos.toml** — Optional static configuration override for repositories + - Declare tags, tier, and workspace_type in `workspace/repos.toml` + - Overrides are applied on top of auto-discovered repo metadata +- **Unified Node Model** — `core::node::{Node, NodeType, Edge}` abstraction + - `NodeType::GitRepo | VaultNote | Asset | ExternalLink` + - Foundation for future Knowledge Graph unification +- **Workspace Directory** — `%LOCALAPPDATA%/devbase/workspace/` with `vault/` and `assets/` +- **MCP Client Config** — `mcp.json` for Claude Desktop / Cursor integration + +### Changed + +- **Architecture principle**: File system = source of truth; SQLite/Tantivy = derived index/cache +- Vault notes no longer store `content` in SQLite (read from disk on demand) + +## [0.1.0] - 2026-04-20 + +### Added + +- **TUI Dashboard** — Terminal UI for multi-repository workspace management + - Repository list with status icons, stars, and tag indicators + - Detail panel with Overview / Health / Insights tabs + - Stars Trend sparkline (30-day history) + - Help Overlay with categorized keyboard shortcuts + - Responsive layout: compact / standard / wide screen modes + - Cross-repository code search (ripgrep + Tantivy dual mode) + - One-key launch into gitui / lazygit +- **MCP Server** — 14 tools for AI Agent integration (stdio transport) + - `devkit_scan`, `devkit_health`, `devkit_sync`, `devkit_query_repos` + - `devkit_code_metrics`, `devkit_module_graph`, `devkit_natural_language_query` + - `devkit_index`, `devkit_query`, `devkit_note`, `devkit_digest` + - `devkit_github_info`, `devkit_paper_index`, `devkit_experiment_log` +- **Safe Sync Engine** — Four-tier sync policies: Mirror / Conservative / Rebase / Merge + - Pre-sync safety assessment (dirty, diverged, detached HEAD detection) + - Dry-run preview with per-repo recommendations + - Async batch sync with concurrency control and timeout +- **Registry & Indexing** — SQLite-backed workspace registry + - Automatic Git + non-Git workspace discovery + - Schema migrations with automatic backup snapshots + - GitHub Stars cache with TTL and historical tracking + - Tantivy full-text index for repository knowledge search +- **Health Monitoring** — Workspace-wide health checks + - Git status tracking (dirty / ahead / behind / diverged) + - Blake3 hash snapshots for non-Git workspaces + - Environment tool version detection +- **i18n** — Chinese and English bilingual support +- **CI/CD** — GitHub Actions workflow for check, test, fmt, clippy on Windows + +### Engineering + +- Modular architecture: 22 crates modules with clear separation of concerns +- Dual lib+bin mode: `lib.rs` exports all modules for programmatic use +- Theme system with semantic color tokens (dark/light ready) +- Render layer split from monolithic 1026-line file into 6 focused submodules + +### Security + +- `cargo audit` clean (0 vulnerabilities in direct dependencies) + +[0.20.1]: https://github.com/juice094/devbase/releases/tag/v0.20.1 +[0.20.0]: https://github.com/juice094/devbase/releases/tag/v0.20.0 +[0.19.0]: https://github.com/juice094/devbase/releases/tag/v0.19.0 +[0.18.0]: https://github.com/juice094/devbase/releases/tag/v0.18.0 +[0.1.0]: https://github.com/juice094/devbase/releases/tag/v0.1.0 diff --git a/README.md b/README.md index d9d0d06..1db3877 100644 --- a/README.md +++ b/README.md @@ -1,146 +1,146 @@ -
- -# 🗄️ devbase - -> **开发者工作空间的世界模型编译器** - -一套引擎,统一代码上下文、知识记忆与智能体推理。 - -[![Version](https://img.shields.io/badge/version-v0.20.1-blue)](https://github.com/juice094/devbase/releases) -[![Tests](https://img.shields.io/badge/tests-494%2B%20passed-brightgreen)](https://github.com/juice094/devbase/actions) -[![Clippy](https://img.shields.io/badge/clippy-0%20warnings-green)](https://github.com/juice094/devbase/actions) -[![License](https://img.shields.io/badge/license-AGPL--3.0%20%2F%20Commercial-orange)](LICENSE) -[![Rust](https://img.shields.io/badge/rust-1.95%2B-9cf)](https://www.rust-lang.org) -[![Glama](https://glama.ai/mcp/servers/juice094/devbase/badges/score.svg)](https://glama.ai/mcp/servers/juice094/devbase) - -
- ---- - -## 📋 简介 - -devbase 将代码库、笔记与工作流编译为 AI 可推理的结构化情境 — 不是存储数据,是构建环境的心智模型。 - -| 你是谁 | devbase 为你做什么 | -|:---|:---| -| **人类开发者** | `devbase tui` — 终端仪表盘,一眼看清 N 个仓库的 Git 状态,按 `s` 批量安全同步 | -| **AI 智能体** | 70 个 MCP 工具:通过 `devkit_skill_run` 发现、执行、编排 Skill — 不再重复造轮子 | -| **项目维护者** | `devbase skill discover .` — 一键将项目封装为 Skill,让 AI 用户能够发现和调用 | - ---- - -## 🌟 核心亮点 - -| 亮点 | 说明 | -|:---|:---| -| 📊 **TUI 仪表盘** | ratatui 终端界面:跨仓库搜索、安全同步、Skill/Workflow 发现 | -| 🔌 **70 个 MCP 工具** | stdio 本地进程通信:仓库管理、代码分析、知识图谱、智能体记忆 | -| 🏠 **本地优先** | 零数据离开本机 — SQLite + Tantivy + tree-sitter,无需云端 | -| 🔍 **混合检索** | BM25 全文 + FTS5 技能搜索 + 纯 SQL 向量搜索(`cosine_similarity` UDF),零 ML 运行时依赖 | - -> [完整 70 个 Tool 矩阵 → docs/guides/mcp-integration.md](docs/guides/mcp-integration.md) - ---- - -## 🔧 技术栈 - -| 组件 | 技术 | -|:---|:---| -| 终端 UI | ratatui | -| 全文检索 | Tantivy (BM25) | -| 语义检索 | SQLite BLOB + `cosine_similarity` UDF | -| 代码解析 | tree-sitter (Rust/Python/TS/Go) | -| 关系存储 | SQLite (WAL 模式, OpLog 审计) | -| 协议 | Model Context Protocol (stdio) | - ---- - -## 📁 项目结构 - -``` -devbase/ -├── src/ -│ ├── main.rs # CLI 入口:命令解析与分发 -│ ├── tui/ # 终端仪表盘(ratatui) -│ │ # 多仓库导航、跨仓库搜索、安全同步预览 -│ ├── mcp/ # MCP Server(69 个工具,stdio 通信) -│ │ # 人类与 AI 的统一接口层 -│ ├── registry/ # 仓库注册表:Git 状态、健康检查、批量同步 -│ ├── index/ # Tantivy 全文索引 + SQLite 向量索引 -│ │ # 混合检索核心,BM25 + cosine 向量评分 -│ ├── vault/ # PARA 笔记系统:双向链接、BFS 图遍历 -│ ├── skill/ # Skill 生命周期:发现 → 安装 → 执行 → 评分 → 发布 -│ │ # 自动封装项目为 AI 可调用的 Skill -│ ├── workflow/ # YAML 编排引擎:5 种 step 类型,拓扑调度 + 并行执行 -│ └── session/ # 智能体会话生命周期 + 向量记忆持久化 -├── docs/ -│ ├── architecture/ # 架构文档总览 -│ └── guides/ # 集成指南(Claude Code / 5ire / Kimi CLI) -├── scripts/ -│ ├── install.ps1 # Windows 一键安装 -│ ├── install.sh # Linux/macOS 一键安装 -│ └── devbase-claude.ps1 # Claude Code 一键启动器 -└── README.md -``` - -### 核心设计 - -**三层架构**: -1. **交互层** — TUI 仪表盘 + MCP Server + Workflow 引擎(人类与 AI 的接口) -2. **编译层** — 感知(tree-sitter/Tantivy/Git)→ 知识(图谱/向量/关系)→ 策略(同步/工作流/健康守卫) -3. **可靠层** — SQLite WAL 并发安全 + 索引健康检测 + OpLog 全操作审计 - -> 可靠性红线:所有 Registry 写入必须留下不可变审计痕迹(OpLog);Schema 迁移前自动生成快照。详见 [docs/architecture/overview.md](docs/architecture/overview.md)。 - ---- - -## 🚀 快速开始 - -```powershell -# Windows 一行安装 -irm https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.ps1 | iex - -# 或下载预编译二进制(~8.7 MB) -# https://github.com/juice094/devbase/releases/tag/v0.20.1 -``` - -```bash -# Linux / macOS -curl -fsSL https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.sh | bash - -# 基础工作流 -devbase scan . --register # 1. 扫描并注册工作区 -devbase tui # 2. 打开仪表盘 -devbase mcp # 3. 启动 MCP 服务端(供 AI 调用) -``` - -**AI 助手配置** — 添加到 `claude_desktop_config.json` 或 `~/.kimi/mcp.json`: -```json -{ "mcpServers": { "devbase": { "command": "devbase", "args": ["mcp"] } } } -``` - ---- - -## 🤝 参与贡献 - -详见 [CONTRIBUTING.md](CONTRIBUTING.md) — 添加 MCP 工具、Skill Schema、构建模式说明。快速验证: - -```bash -cargo build --release -cargo test --all-targets -cargo clippy --all-targets -D warnings -``` - ---- - -## 📄 许可证 - -双许可证:[AGPL-3.0+](LICENSE) 开源 / [商业授权](LICENSE-COMMERCIAL.md) 闭源使用。联系:`juice094@protonmail.com`。 - ---- - -
- -**[⭐ Star](https://github.com/juice094/devbase) · [🐛 Issues](https://github.com/juice094/devbase/issues) · [🤝 Contribute](CONTRIBUTING.md)** - -
+
+ +# 🗄️ devbase + +> **开发者工作空间的世界模型编译器** + +一套引擎,统一代码上下文、知识记忆与智能体推理。 + +[![Version](https://img.shields.io/badge/version-v0.20.1-blue)](https://github.com/juice094/devbase/releases) +[![Tests](https://img.shields.io/badge/tests-494%2B%20passed-brightgreen)](https://github.com/juice094/devbase/actions) +[![Clippy](https://img.shields.io/badge/clippy-0%20warnings-green)](https://github.com/juice094/devbase/actions) +[![License](https://img.shields.io/badge/license-AGPL--3.0%20%2F%20Commercial-orange)](LICENSE) +[![Rust](https://img.shields.io/badge/rust-1.95%2B-9cf)](https://www.rust-lang.org) +[![Glama](https://glama.ai/mcp/servers/juice094/devbase/badges/score.svg)](https://glama.ai/mcp/servers/juice094/devbase) + +
+ +--- + +## 📋 简介 + +devbase 将代码库、笔记与工作流编译为 AI 可推理的结构化情境 — 不是存储数据,是构建环境的心智模型。 + +| 你是谁 | devbase 为你做什么 | +|:---|:---| +| **人类开发者** | `devbase tui` — 终端仪表盘,一眼看清 N 个仓库的 Git 状态,按 `s` 批量安全同步 | +| **AI 智能体** | 71 个 MCP 工具:通过 `devkit_skill_run` 发现、执行、编排 Skill — 不再重复造轮子 | +| **项目维护者** | `devbase skill discover .` — 一键将项目封装为 Skill,让 AI 用户能够发现和调用 | + +--- + +## 🌟 核心亮点 + +| 亮点 | 说明 | +|:---|:---| +| 📊 **TUI 仪表盘** | ratatui 终端界面:跨仓库搜索、安全同步、Skill/Workflow 发现 | +| 🔌 **71 个 MCP 工具** | stdio 本地进程通信:仓库管理、代码分析、知识图谱、智能体记忆 | +| 🏠 **本地优先** | 零数据离开本机 — SQLite + Tantivy + tree-sitter,无需云端 | +| 🔍 **混合检索** | BM25 全文 + FTS5 技能搜索 + 纯 SQL 向量搜索(`cosine_similarity` UDF),零 ML 运行时依赖 | + +> [完整 71 个 Tool 矩阵 → docs/guides/mcp-integration.md](docs/guides/mcp-integration.md) + +--- + +## 🔧 技术栈 + +| 组件 | 技术 | +|:---|:---| +| 终端 UI | ratatui | +| 全文检索 | Tantivy (BM25) | +| 语义检索 | SQLite BLOB + `cosine_similarity` UDF | +| 代码解析 | tree-sitter (Rust/Python/TS/Go) | +| 关系存储 | SQLite (WAL 模式, OpLog 审计) | +| 协议 | Model Context Protocol (stdio) | + +--- + +## 📁 项目结构 + +``` +devbase/ +├── src/ +│ ├── main.rs # CLI 入口:命令解析与分发 +│ ├── tui/ # 终端仪表盘(ratatui) +│ │ # 多仓库导航、跨仓库搜索、安全同步预览 +│ ├── mcp/ # MCP Server(71 个工具,stdio 通信) +│ │ # 人类与 AI 的统一接口层 +│ ├── registry/ # 仓库注册表:Git 状态、健康检查、批量同步 +│ ├── index/ # Tantivy 全文索引 + SQLite 向量索引 +│ │ # 混合检索核心,BM25 + cosine 向量评分 +│ ├── vault/ # PARA 笔记系统:双向链接、BFS 图遍历 +│ ├── skill/ # Skill 生命周期:发现 → 安装 → 执行 → 评分 → 发布 +│ │ # 自动封装项目为 AI 可调用的 Skill +│ ├── workflow/ # YAML 编排引擎:5 种 step 类型,拓扑调度 + 并行执行 +│ └── session/ # 智能体会话生命周期 + 向量记忆持久化 +├── docs/ +│ ├── architecture/ # 架构文档总览 +│ └── guides/ # 集成指南(Claude Code / 5ire / Kimi CLI) +├── scripts/ +│ ├── install.ps1 # Windows 一键安装 +│ ├── install.sh # Linux/macOS 一键安装 +│ └── devbase-claude.ps1 # Claude Code 一键启动器 +└── README.md +``` + +### 核心设计 + +**三层架构**: +1. **交互层** — TUI 仪表盘 + MCP Server + Workflow 引擎(人类与 AI 的接口) +2. **编译层** — 感知(tree-sitter/Tantivy/Git)→ 知识(图谱/向量/关系)→ 策略(同步/工作流/健康守卫) +3. **可靠层** — SQLite WAL 并发安全 + 索引健康检测 + OpLog 全操作审计 + +> 可靠性红线:所有 Registry 写入必须留下不可变审计痕迹(OpLog);Schema 迁移前自动生成快照。详见 [docs/architecture/overview.md](docs/architecture/overview.md)。 + +--- + +## 🚀 快速开始 + +```powershell +# Windows 一行安装 +irm https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.ps1 | iex + +# 或下载预编译二进制(~8.7 MB) +# https://github.com/juice094/devbase/releases/tag/v0.20.1 +``` + +```bash +# Linux / macOS +curl -fsSL https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.sh | bash + +# 基础工作流 +devbase scan . --register # 1. 扫描并注册工作区 +devbase tui # 2. 打开仪表盘 +devbase mcp # 3. 启动 MCP 服务端(供 AI 调用) +``` + +**AI 助手配置** — 添加到 `claude_desktop_config.json` 或 `~/.kimi/mcp.json`: +```json +{ "mcpServers": { "devbase": { "command": "devbase", "args": ["mcp"] } } } +``` + +--- + +## 🤝 参与贡献 + +详见 [CONTRIBUTING.md](CONTRIBUTING.md) — 添加 MCP 工具、Skill Schema、构建模式说明。快速验证: + +```bash +cargo build --release +cargo test --all-targets +cargo clippy --all-targets -D warnings +``` + +--- + +## 📄 许可证 + +双许可证:[AGPL-3.0+](LICENSE) 开源 / [商业授权](LICENSE-COMMERCIAL.md) 闭源使用。联系:`juice094@protonmail.com`。 + +--- + +
+ +**[⭐ Star](https://github.com/juice094/devbase) · [🐛 Issues](https://github.com/juice094/devbase/issues) · [🤝 Contribute](CONTRIBUTING.md)** + +
diff --git a/scripts/Sync-WorkspaceJunctions.ps1 b/scripts/Sync-WorkspaceJunctions.ps1 new file mode 100644 index 0000000..204bba7 --- /dev/null +++ b/scripts/Sync-WorkspaceJunctions.ps1 @@ -0,0 +1,66 @@ +# Sync-WorkspaceJunctions.ps1 +# 自动维护 Obsidian Vault ↔ workspace / dev 的 NTFS Junction +# 用于 OpenClaw heartbeat 或手动执行 +# +# ⚠️ 不在 workspace 内部创建 junction → dev/ +# 原因: 会把 62GB 代码仓库暴露进 workspace,破坏 syncthing/备份/搜索 + +$ErrorActionPreference = "Stop" + +$Junctions = @( + @{ + Path = "$env:USERPROFILE\Documents\Obsidian Vault\80-Gray" + Target = "$env:USERPROFILE\.kimi_openclaw\workspace" + Name = "Obsidian/80-Gray → workspace" + }, + @{ + Path = "$env:USERPROFILE\Documents\Obsidian Vault\90-Code" + Target = "$env:USERPROFILE\dev" + Name = "Obsidian/90-Code → dev" + } +) + +$Issues = @() +$Fixed = @() + +foreach ($J in $Junctions) { + $exists = Test-Path $J.Path + $isJunction = (Get-Item $J.Path -Force -ErrorAction SilentlyContinue).Attributes -band [System.IO.FileAttributes]::ReparsePoint + + if (-not $exists) { + # 缺失 → 创建 + try { + New-Item -Path $J.Path -ItemType Junction -Target $J.Target -Force | Out-Null + $Fixed += "CREATED: $($J.Name)" + } catch { + $Issues += "FAILED: $($J.Name) — $_" + } + } elseif (-not $isJunction) { + # 存在但不是 Junction → 报告异常 + $Issues += "NOT-JUNCTION: $($J.Name) — exists as regular directory" + } else { + # 存在且是 Junction → 验证可访问性 + $reachable = Test-Path "$($J.Path)\*" + if (-not $reachable) { + $Issues += "BROKEN: $($J.Name) — junction exists but target unreachable" + } + } +} + +# 输出 +if ($Fixed.Count -gt 0) { + Write-Host "=== FIXED ===" -ForegroundColor Green + $Fixed | ForEach-Object { Write-Host " $_" } +} + +if ($Issues.Count -gt 0) { + Write-Host "=== ISSUES ===" -ForegroundColor Yellow + $Issues | ForEach-Object { Write-Host " $_" } +} + +if ($Fixed.Count -eq 0 -and $Issues.Count -eq 0) { + Write-Host "All junctions healthy." -ForegroundColor Green +} + +# 返回状态码供 heartbeat 判断 +if ($Issues.Count -gt 0) { exit 1 } else { exit 0 } From 2140c76ae9e1916c22245039d6b7b2e548d9444a Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 13 Jun 2026 22:31:15 +0800 Subject: [PATCH 09/11] docs: structural overhaul of project docs and GitHub facade - Sync all README/AGENTS/CONTRIBUTING metrics to v0.20.1 (Schema v36, 71 tools, 605 tests) - Rewrite docs/README.md as canonical documentation navigation hub - Fix broken internal links and tier distribution (5 stable / 58 beta / 8 experimental) - Add missing stable tool docs (query_repos, vault_read, project_context) - Update GitHub templates (PR, bug, feature), SECURITY, SUPPORT, CODE_OF_CONDUCT - Add standard repo files: FUNDING.yml, dependabot.yml, release.yml, .gitattributes, .editorconfig - Fix server.json tier classification and complete tool list - Remove outdated docs/AGENTS-full.md to Recycle Bin --- .claude/CLAUDE.md | 186 +-- .editorconfig | 28 + .gitattributes | 16 + .github/FUNDING.yml | 7 + .github/ISSUE_TEMPLATE/bug_report.md | 43 +- .github/ISSUE_TEMPLATE/config.yml | 3 + .github/ISSUE_TEMPLATE/feature_request.md | 26 +- .github/PULL_REQUEST_TEMPLATE.md | 37 +- .github/dependabot.yml | 24 + .github/release.yml | 32 + AGENTS.md | 764 ++++++--- CHANGELOG.md | 1388 ++++++++--------- CODE_OF_CONDUCT.md | 97 +- CONTRIBUTING.md | 16 +- KNOWN_ISSUES.md | 16 +- README.md | 293 ++-- SECURITY.md | 4 +- SUPPORT.md | 5 +- docs/AGENTS-full.md | 705 --------- docs/README.md | 126 +- docs/ROADMAP.md | 6 +- docs/guides/README.md | 2 + docs/guides/mcp-integration.md | 20 +- docs/reference/README.md | 11 +- docs/reference/mcp-tools.md | 33 +- docs/reference/stable-tools/README.md | 6 +- .../reference/stable-tools/project_context.md | 109 ++ docs/reference/stable-tools/query_repos.md | 96 ++ docs/reference/stable-tools/vault_read.md | 75 + server.json | 386 ++--- smithery.yaml | 5 +- 31 files changed, 2346 insertions(+), 2219 deletions(-) create mode 100644 .editorconfig create mode 100644 .gitattributes create mode 100644 .github/FUNDING.yml create mode 100644 .github/dependabot.yml create mode 100644 .github/release.yml delete mode 100644 docs/AGENTS-full.md create mode 100644 docs/reference/stable-tools/project_context.md create mode 100644 docs/reference/stable-tools/query_repos.md create mode 100644 docs/reference/stable-tools/vault_read.md diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 3aa0163..5b1ad04 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -1,93 +1,93 @@ -# devbase — Cognitive Anchor - -> **Purpose**: This file is designed to survive context compression. It contains -> immutable facts and current state that every AI session must know before -> working on this project. If you are reading this after a context reset, -> treat this as your primary source of truth. - ---- - -## Immutable Facts(不可变事实) - -| ID | Fact | Source | Status | -|----|------|--------|--------| -| F-001 | Version | `Cargo.toml` | **v0.20.1** | -| F-002 | Edition | `Cargo.toml` | **Rust 2024** | -| F-003 | Test Coverage | CI | **494 passed, 0 failed, 5 ignored** | -| F-004 | Production Unwrap | Architecture Invariants | **0** (G5 rule enforced) | -| F-005 | MCP Tools | `src/mcp/mod.rs` | **71** (5 Stable / 62 Beta / 4 Experimental) | -| F-006 | Schema Version | `registry/migrate.rs` | **v36** | -| F-007 | Entities Table | Schema v21+ | **唯一真相源** (`repos` 表已删除) | -| F-008 | SQLite Mode | `storage.rs` | **WAL mode** | -| F-009 | Clippy | CI | **`-D warnings` 全绿** | -| F-010 | Release Assets | GitHub Releases | **Linux + Windows x64** 预编译二进制 | - -## 架构红线(Architecture Guardrails) - -- **RF-1**: 无裸 `init_db()` 调用,全部使用 `StorageBackend` 注入 -- **RF-2**: `TempStorageBackend` 用于测试隔离(禁止 `DEVBASE_DATA_DIR` 竞态) -- **RF-3**: `entities` 表是唯一真相源 -- **RF-4**: 二进制上下文 ≤ 1MB -- **RF-5**: 模块间无循环依赖 -- **RF-6**: 生产代码零 `unwrap`/`expect`/`panic`(测试除外) -- **RF-7**: 路径输出必须脱敏(`sanitize_path()` 掩码 home 目录) - -## 当前上下文(Current Context) - -| 属性 | 值 | -|------|-----| -| 默认分支 | `main` | -| 最新 Release | `v0.20.1` (2026-05-17) | -| 当前 Phase | Phase 1 Production Hardening ✅ 完成 | -| 下一 Phase | Phase 12 — v0.21.0 "External Capability Grafting" | -| 活跃 PR | 无(PR #55 已合并) | - -## 已知架构 Gaps(不可与 Immutable Facts 混淆) - -这些是**待实现**的能力,不是 bug: - -| Gap | 影响 | 计划版本 | 状态 | -|-----|------|----------|------| -| ~~`relations` 表零生产读取路径~~ | ~~统一实体模型的图遍历能力未暴露~~ | ~~v0.21.0~~ | **已完成** — `devkit_relation_store/query/delete` 已存在,`project_context` 已读取 | -| ~~Workflow 引擎零 MCP 暴露~~ | ~~AI 无法发现/触发工作流~~ | ~~v0.21.0~~ | **已完成** — `devkit_workflow_list/run/status` 已存在 | -| ~~`project_context` 不完整~~ | ~~缺少 relations/limits/skills/workflows~~ | ~~v0.21.0~~ | **已完成** — 已补充 `known_limits` + `skills` | -| 31/68 MCP 工具缺少调用测试 | 回归风险 | v0.21.0 | 待评估 | -| ~~`mcp/tools/repo.rs` 2376 行~~ | ~~维护负担~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `tools/` 目录,`repo.rs` 现 730 行 | -| ~~`init_db_at` 1214 行~~ | ~~迁移函数过大~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `registry/migrate.rs`(503 行)+ `repo.rs` + `vault.rs` + `links.rs` | - -## 防失忆校验清单(每次会话启动) - -- [ ] 已读取本文件(`devbase/.claude/CLAUDE.md`) -- [ ] 已确认 `Cargo.toml` 版本与上表 F-001 一致 -- [ ] 如果 handoff 文档说"未完成",确认是新环境问题还是全局阻塞 -- [ ] 如果修改 Schema,已更新 `registry/migrate.rs` 和 `SCHEMA_DDL` - -## 快速入口 - -| 你想做什么 | 命令 | -|-----------|------| -| 运行测试 | `cargo test --all-targets` | -| 检查 clippy | `cargo clippy --all-targets -D warnings` | -| 检查格式化 | `cargo fmt --check` | -| 运行 invariant checks | `scripts/invariant-checks/run-checks.ps1` | -| 启动 MCP Server | `cargo run -- mcp` | -| 启动 TUI | `cargo run -- tui` | -| 扫描当前目录 | `devbase scan . --register` | -| 索引仓库 | `devbase index` | - -## 关键文件映射 - -| 概念 | 文件 | -|------|------| -| 架构决策 | `docs/architecture/` | -| 稳定工具文档 | `docs/reference/stable-tools/` | -| 快速开始 | `docs/guides/quickstart.md` | -| MCP 集成指南 | `docs/guides/mcp-integration.md` | -| 变更日志 | `CHANGELOG.md` | -| Agent 简报 | `AGENTS.md` | -| 贡献指南 | `CONTRIBUTING.md` | - ---- - -**Last Updated**: 2026-05-20 by Claude Opus 4.7 -**Version**: v0.20.1 +# devbase — Cognitive Anchor + +> **Purpose**: This file is designed to survive context compression. It contains +> immutable facts and current state that every AI session must know before +> working on this project. If you are reading this after a context reset, +> treat this as your primary source of truth. + +--- + +## Immutable Facts(不可变事实) + +| ID | Fact | Source | Status | +|----|------|--------|--------| +| F-001 | Version | `Cargo.toml` | **v0.20.1** | +| F-002 | Edition | `Cargo.toml` | **Rust 2024** | +| F-003 | Test Coverage | CI | **605 passed, 0 failed, 7 ignored** | +| F-004 | Production Unwrap | Architecture Invariants | **0** (G5 rule enforced) | +| F-005 | MCP Tools | `src/mcp/mod.rs` | **71** (5 Stable / 58 Beta / 8 Experimental) | +| F-006 | Schema Version | `registry/migrate.rs` | **v36** | +| F-007 | Entities Table | Schema v21+ | **唯一真相源** (`repos` 表已删除) | +| F-008 | SQLite Mode | `storage.rs` | **WAL mode** | +| F-009 | Clippy | CI | **`-D warnings` 全绿** | +| F-010 | Release Assets | GitHub Releases | **Linux + Windows x64** 预编译二进制 | + +## 架构红线(Architecture Guardrails) + +- **RF-1**: 无裸 `init_db()` 调用,全部使用 `StorageBackend` 注入 +- **RF-2**: `TempStorageBackend` 用于测试隔离(禁止 `DEVBASE_DATA_DIR` 竞态) +- **RF-3**: `entities` 表是唯一真相源 +- **RF-4**: 二进制上下文 ≤ 1MB +- **RF-5**: 模块间无循环依赖 +- **RF-6**: 生产代码零 `unwrap`/`expect`/`panic`(测试除外) +- **RF-7**: 路径输出必须脱敏(`sanitize_path()` 掩码 home 目录) + +## 当前上下文(Current Context) + +| 属性 | 值 | +|------|-----| +| 默认分支 | `main` | +| 最新 Release | `v0.20.1` (2026-05-17) | +| 当前 Phase | Phase 1 Production Hardening ✅ 完成 | +| 下一 Phase | Phase 12 — v0.21.0 "External Capability Grafting" | +| 活跃 PR | 无(PR #55 已合并) | + +## 已知架构 Gaps(不可与 Immutable Facts 混淆) + +这些是**待实现**的能力,不是 bug: + +| Gap | 影响 | 计划版本 | 状态 | +|-----|------|----------|------| +| ~~`relations` 表零生产读取路径~~ | ~~统一实体模型的图遍历能力未暴露~~ | ~~v0.21.0~~ | **已完成** — `devkit_relation_store/query/delete` 已存在,`project_context` 已读取 | +| ~~Workflow 引擎零 MCP 暴露~~ | ~~AI 无法发现/触发工作流~~ | ~~v0.21.0~~ | **已完成** — `devkit_workflow_list/run/status` 已存在 | +| ~~`project_context` 不完整~~ | ~~缺少 relations/limits/skills/workflows~~ | ~~v0.21.0~~ | **已完成** — 已补充 `known_limits` + `skills` | +| MCP 工具测试覆盖不均 | 部分 Beta 工具仅有 smoke test | v0.21.0 | 待评估 | +| ~~`mcp/tools/repo.rs` 2376 行~~ | ~~维护负担~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `tools/` 目录 | +| ~~`init_db_at` 1214 行~~ | ~~迁移函数过大~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `registry/migrate.rs` + 子模块 | + +## 防失忆校验清单(每次会话启动) + +- [ ] 已读取本文件(`devbase/.claude/CLAUDE.md`) +- [ ] 已确认 `Cargo.toml` 版本与上表 F-001 一致 +- [ ] 如果 handoff 文档说"未完成",确认是新环境问题还是全局阻塞 +- [ ] 如果修改 Schema,已更新 `registry/migrate.rs` 和 `SCHEMA_DDL` + +## 快速入口 + +| 你想做什么 | 命令 | +|-----------|------| +| 运行测试 | `cargo test --all-targets` | +| 检查 clippy | `cargo clippy --all-targets -D warnings` | +| 检查格式化 | `cargo fmt --check` | +| 运行 invariant checks | `scripts/invariant-checks/run-checks.ps1` | +| 启动 MCP Server | `cargo run -- mcp` | +| 启动 TUI | `cargo run -- tui` | +| 扫描当前目录 | `devbase scan . --register` | +| 索引仓库 | `devbase index` | + +## 关键文件映射 + +| 概念 | 文件 | +|------|------| +| 架构决策 | `docs/architecture/` | +| 稳定工具文档 | `docs/reference/stable-tools/` | +| 快速开始 | `docs/guides/quickstart.md` | +| MCP 集成指南 | `docs/guides/mcp-integration.md` | +| 变更日志 | `CHANGELOG.md` | +| Agent 简报 | `AGENTS.md` | +| 贡献指南 | `CONTRIBUTING.md` | + +--- + +**Last Updated**: 2026-06-13 +**Version**: v0.20.1 diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..a2bfa0d --- /dev/null +++ b/.editorconfig @@ -0,0 +1,28 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_style = space +indent_size = 4 +max_line_length = 100 + +[*.{rs,toml}] +indent_size = 4 +max_line_length = 100 + +[*.{md,yml,yaml,json}] +indent_size = 2 +max_line_length = 100 + +[*.{ps1}] +end_of_line = crlf +indent_size = 4 + +[*.sh] +indent_size = 2 + +[Makefile] +indent_style = tab diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b1931f9 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,16 @@ +# Line ending normalization for cross-platform collaboration +*.sh eol=lf +*.ps1 eol=crlf + +# Binary files — do not treat as text +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.db binary +*.db-wal binary +*.db-journal binary +*.zip binary +*.tar binary +*.gz binary diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..6d4b83c --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,7 @@ +# Funding and sponsorship configuration for devbase +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/displaying-a-sponsor-button-in-your-repository + +github: [juice094] +custom: + - "https://github.com/juice094/devbase" + - "mailto:juice094@protonmail.com" diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 4f5580a..8bd80ae 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -4,28 +4,39 @@ about: Create a report to help us improve devbase title: '[BUG] ' labels: bug assignees: '' - --- -**Describe the bug** -A clear and concise description of what the bug is. +## Bug Description + + + +## Reproduction Steps + + -**To Reproduce** -Steps to reproduce the behavior: 1. Run `...` -2. Click on '...' +2. Run `...` 3. See error -**Expected behavior** -A clear and concise description of what you expected to happen. +## Expected Behavior + + + +## Actual Behavior + + + +```text +# 请粘贴完整的错误日志或命令输出 +``` + +## Environment -**Environment (please complete the following information):** - - OS: [e.g. Windows 11, macOS 14, Ubuntu 22.04] - - devbase version: [output of `devbase --version`] - - Rust version: [output of `rustc --version`] +- OS: [e.g. Windows 11, macOS 14, Ubuntu 22.04] +- devbase version: [output of `devbase --version`] +- Rust version: [output of `rustc --version`] +- Installation method: [cargo install / install.ps1 / install.sh / GitHub release] -**Screenshots / Logs** -If applicable, add screenshots or console output to help explain your problem. +## Additional Context -**Additional context** -Add any other context about the problem here. + diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 96f6514..09dadaf 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -6,3 +6,6 @@ contact_links: - name: Question or discussion url: https://github.com/juice094/devbase/discussions about: For Q&A, architecture debates, or show-and-tell, use GitHub Discussions. + - name: Commercial licensing inquiry + url: mailto:juice094@protonmail.com + about: For commercial / dual-licensing questions, please email the maintainer. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 9ebf1ca..40bbb56 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -4,20 +4,24 @@ about: Suggest an idea for devbase title: '[Feature] ' labels: enhancement assignees: '' - --- -**Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] +## Problem Statement + + + +## Proposed Solution + + + +## Use Case + + -**Describe the solution you'd like** -A clear and concise description of what you want to happen. +## Alternatives Considered -**Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. + -**Use case** -Who would benefit from this feature? How would they use it? +## Additional Context -**Additional context** -Add any other context, mockups, or references about the feature request here. + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6c7a08c..e7517a9 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,26 +1,51 @@ ## Summary - + + +## Motivation / Context + + ## Type of Change - [ ] Bug fix (non-breaking) - [ ] New feature - [ ] Breaking change -- [ ] Documentation +- [ ] Documentation only - [ ] Performance improvement - [ ] Refactoring (no behavior change) +- [ ] Test-only change +- [ ] Build / CI / tooling + +## Testing + + -## Checklist +```bash +# 本地验证命令示例 +cargo test --all-targets +cargo clippy --all-targets -D warnings +cargo fmt --check +scripts/invariant-checks/run-checks.ps1 # Windows +``` - [ ] `cargo test --all-targets` passes locally - [ ] `cargo clippy --all-targets -D warnings` passes - [ ] `cargo fmt --check` passes - [ ] New code has no production `unwrap`/`expect`/`panic` (test code exempt) -- [ ] Schema changes include migration in `src/registry/migrate.rs` +- [ ] Schema changes include migration in `src/registry/migrate.rs` **and** `src/registry/test_helpers.rs` - [ ] New MCP tools include tests in `src/mcp/tests.rs` -- [ ] README / AGENTS.md updated if user-facing behavior changed +- [ ] README / AGENTS.md / docs/README.md updated if user-facing behavior changed +- [ ] `scripts/invariant-checks/run-checks.ps1` passes (Windows) + +## Breaking Changes / Migration Notes + + ## Related Issues - + + +## Additional Notes + + diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..43963e9 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,24 @@ +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "weekly" + day: "monday" + open-pull-requests-limit: 10 + labels: + - "dependencies" + - "rust" + commit-message: + prefix: "chore(deps)" + include: "scope" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + labels: + - "dependencies" + - "ci" + commit-message: + prefix: "chore(ci)" diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 0000000..addbc20 --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,32 @@ +# GitHub Release notes configuration +# https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes + +changelog: + exclude: + labels: + - ignore-for-release + - "chore" + authors: + - dependabot[bot] + categories: + - title: ⚠️ Breaking Changes + labels: + - breaking-change + - title: 🚀 New Features + labels: + - enhancement + - feature + - title: 🐛 Bug Fixes + labels: + - bug + - title: 🛠️ Maintenance + labels: + - refactor + - perf + - test + - title: 📚 Documentation + labels: + - documentation + - title: Other Changes + labels: + - "*" diff --git a/AGENTS.md b/AGENTS.md index 613efdf..c5385f2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,258 +1,506 @@ -# Agent 环境指引 - -`devbase` 是 **本地情境编译器(Local Context Compiler)** —— AI agent 在本地数字世界中的海马体。 - -> 它将本地数字资产的原始数据(代码库、笔记、Skill、工作流)编译为 AI 可决策的结构化情境,不负责思考,不负责执行,只负责感知、编码、持久化、检索。 - -- **当前阶段**:阶段十一 — v0.20.0 已发布(知识完备性) -- **当前版本**:v0.20.1(Schema 36,71 MCP tools,495 tests) -- **已完成里程碑**:Registry God Object 完全拆解(10 子模块提取)+ 18 workspace crates 提取 + MCP Python SDK 1.16.0 兼容修复 + repo.rs trait 化 + flaky 测试根治(RF-2.1/2.2/2.3)+ 许可证迁移 + health 性能优化(-44%)+ index skip-embeddings + batch encoding 实验 + RF-6 清零 + 架构治理文档(ADR/不变量清单)+ Tantivy BM25 代码符号搜索(P1)+ AppContext 职责拆分 Phase 1/2(storage.rs 860→430 行)+ 架构不变量 CI(G5/T11/T12)+ Embedding 多后端(Candle/Ollama 配置切换, P3)+ EnvVersionCache 扩展(9 工具链检测, P4)+ **v0.16.0 Agent Contexts(P1/P2/P3)**:`agent_contexts`/`agent_memories`/`context_entity_links` Schema + 9 个 Session MCP tools + Context-aware Skill Runtime(`DEVBASE_ACTIVE_CONTEXT` 注入)+ **v0.16.1 Workflow-Session Binding**:`workflow_executions.context_id` + 执行自动绑定 Active Context + **v0.17.0 Embedding Externalization**:`embedding` 从 default features 移除(Candle/Ollama 降级为 opt-in `llm-backend`)+ Schema 34 向量存储 + `cosine_similarity` SQLite UDF + `devkit_session_recall` / `devkit_session_index`(60 tools)+ **v0.18.0 ClaudeCode Integration**:`devkit_project_brief`(Markdown 项目简报)+ `devkit_impact_analysis`(修改影响范围分析)+ `devkit_session_export` / `devkit_session_import` + `scripts/devbase-claude.ps1` 启动器(自动注入 `.claude/CLAUDE.md`)+ RFC `docs/RFC/claudecode-workflow-integration.md`(64 tools)+ **v0.18.0 发布收尾**:PR 合并 + 双平台二进制构建 + GitHub Release + 根目录治理 + 世界模型战略认知沉淀(Vault + AGENTS 双向联动)+ NotebookLM 生态消化(5 项目注册)+ GreptimeDB 互补分析 + **v0.19.0 知识基础设施硬化**:SQLite WAL 默认启用 + `devkit_index_health`(Beta)+ Vault 导出(`devkit_vault_export`)+ Redis ADR 决策(放弃引入)+ **v0.20.0 知识完备性**:Vault 双向链接 BFS 图遍历(`devkit_vault_graph` 扩展)+ Vault Git-based 历史追踪(`devkit_vault_history`,第 67 个 tool)+ 混合检索质量监控(`devkit_search_quality`,第 68 个 tool,`HybridSearchMetrics`)+ Block 引用支持(`WikiLink.anchor`:`[[note#heading]]` / `[[note#^block-id]]`)+ 性能回归基线(`#[ignore]` 1k/10k 阈值测试)+ 客户端无关原则(Client-Agnostic Principle)落地 + `skill sync` 泛化接口(零硬编码客户端路径) -- **核心方向**:让 Kimi CLI 在调用文件工具之前,先通过 devbase 获得"该读哪些文件、为什么读、它们之间的关系" -- **本质分析**:见 `vault/99-Meta/devbase-essence-analysis-20260430.md` 与 `docs/architecture/redefinition.md` -- **设计文档**: - - [`docs/architecture/workflow-dsl.md`](docs/architecture/workflow-dsl.md) — Workflow DSL 规范 - - [`docs/architecture/workspace-as-schema.md`](docs/architecture/workspace-as-schema.md) — 统一实体模型设计 - - [`docs/RFC/agent-memory-vector-storage.md`](docs/RFC/agent-memory-vector-storage.md) — v0.17.0 Agent Memory 向量存储 RFC(Embedding 职责外迁设计) - - [`docs/guides/mcp-integration-guide.md`](docs/guides/mcp-integration-guide.md) — MCP 集成指南 - - [`docs/README.md`](docs/README.md) — 完整文档导航 - -Skill Runtime 全生命周期已落地(含依赖管理 Schema v15),Schema v16 统一实体模型(entities/relations)已落地,Skill 自动封装(`discover`)已落地。 - -- **技术栈**:Rust 2024, SQLite, tokio, ratatui, git2, reqwest, tantivy -- **Registry DB**:`%LOCALAPPDATA%\devbase\registry.db`(轻量索引,用户本地,永不进入版本控制) -- **Workspace**:`%LOCALAPPDATA%\devbase\workspace/` —— 文件系统 = source of truth - - `vault/` —— PARA 结构:00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - - `assets/` —— 二进制资源 -- **MCP Server**:stdio only,**71 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 5 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具 + 1 个 DocumentConvert 工具 + 1 个 Ontology Import 工具 + 1 个 Skill Sync 工具);配置见 `mcp.json` -- **Kimi CLI 集成**:MCP server 已通过 `kimi mcp add` 注册,端到端验证通过(`kimi --print` 成功调用 `devkit_health`);项目级 skill 位于 `.kimi/skills/devbase-project/SKILL.md` -- **统一节点模型**:`core::node::{Node, NodeType, Edge}` —— GitRepo / VaultNote / Asset / ExternalLink -- **当前测试**:476 lib passed / 0 failed / 5 ignored + 7/7 integration passed + 11/11 workflow passed(共 494) -- **编译状态**:0 warning / 0 vulnerabilities(`cargo audit` 干净,除上游 `tokei` 的 `RUSTSEC-2020-0163`) -- **Workspace 结构**:`crates/` 目录已启用,19 个零耦合模块已提取为独立 crate(`devbase-symbol-links`, `devbase-sync-protocol`, `devbase-core-types`, `devbase-syncthing-client`, `devbase-vault-frontmatter`, `devbase-vault-wikilink`, `devbase-workflow-interpolate`, `devbase-workflow-model`, `devbase-registry-health`, `devbase-registry-metrics`, `devbase-registry-workspace`, `devbase-embedding`, `devbase-skill-runtime-types`, `devbase-skill-runtime-parser`, `devbase-registry-entity`, `devbase-registry-relation`, `devbase-registry-call-graph`, `devbase-registry-dead-code`, `devbase-registry-code-symbols`) -- **Workflow Engine**:YAML 解析 + 拓扑调度 + batch 并行执行 + 5 种 step 类型(skill/subworkflow/parallel/condition/loop) -- **NLQ 自然语言查询**:TUI `[:]` 触发 embedding 语义搜索,fallback 降级文本搜索 -- **Mind Market 评分**:success_rate / usage_count / rating(0-5),`skill recalc-scores/top/recommend` - -## 关键约定 - -1. **文件操作**:读取用 `ReadFile`,搜索用 `Grep`/`Glob`,修改用 `StrReplaceFile`,整文件重写用 `WriteFile` -2. **Shell**:Windows PowerShell;用 `;` 分隔命令 -3. **Git**:提交前必须通过 `cargo test --all-targets` + `cargo clippy --all-targets -D warnings` + `cargo fmt --check` -4. **Schema 迁移**:`PRAGMA user_version` 安全升级;升级前自动调用 `backup::auto_backup_before_migration()` - -## 安全原则 - -### 本地优先(Local-First) - -- **Registry DB** 始终存储在用户的本地配置目录(`dirs::config_dir()/devbase/`),绝不向远程传输 -- **代码内容** 不会被上传到任何云端服务(除非用户显式配置 GitHub token 用于 stars 查询) -- **MCP Server** 仅通过 stdio 本地进程通信,不暴露网络端口 - -### 客户端无关(Client-Agnostic) - -> devbase 的核心能力(编排、注册、索引、搜索、同步)必须在不依赖任何特定 AI 客户端的前提下独立运行。 - -- ✅ **允许**:向通用目录输出数据,由用户自行分发给任意客户端(如 `skill sync --output-dir ./plans`) -- ✅ **允许**:实现标准协议(MCP)供任意客户端连接 -- ❌ **禁止**:核心能力硬编码特定客户端的路径、API、或配置格式(如 `C:\Users\xxx\.claude`) -- ❌ **禁止**:核心能力的可用性取决于某个客户端是否安装 -- 🟡 **适配层**:`scripts/claude/`、`docs/clients/` 等目录下的客户端适配脚本属于配套示例,不归入核心版本控制 - -### 凭证管理 - -- GitHub token、LLM API key 存储在本地 `config.toml` 中 -- `config.toml` 位于用户配置目录,**不在项目工作目录**,因此不会被意外 `git commit` -- 默认配置模板中的 token 字段使用占位符 ``,避免真实 token 格式泄露 -- `.gitignore` 已覆盖 `*.db`、`.devbase/`、`.env*`、`*.local.toml` - -### 审计与备份 - -- 所有 `scan`/`sync`/`health` 操作自动写入 OpLog(SQLite `oplog` 表) -- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 -- Registry 支持 `export`/`import` 用于用户自主备份 - -## 许可证策略 - -- **主许可证**: AGPL-3.0-or-later (`LICENSE`) -- **商业授权**: 双许可模式,闭源/专有 SaaS 使用需联系作者 (`LICENSE-COMMERCIAL.md`) -- **Cargo.toml**: `license = "AGPL-3.0-or-later"` -- **SPDX 头**: 新增源文件应在顶部包含 AGPL-3.0 声明(见 `LICENSE` 末尾 "How to Apply" 部分) - -## 架构状态(Wave 15b 完成) - -| 维度 | 状态 | -|------|------| -| 代码质量 | `rustfmt.toml` + `cargo fmt` + `clippy -D warnings` 全绿 | -| 模块拆分 | `sync`→5 / `registry`→11 / `mcp` 测试分离 / `search`→hybrid / `oplog_analytics` / `symbol_links` / **workspace: 3 crates extracted** | -| 库/二进制 | `src/lib.rs` 导出全部 **30+** 个模块;`src/main.rs` 仅 CLI 入口 | -| TUI 架构 | `render/` 6 子模块 + `theme.rs` Design Token + `layout.rs` 响应式引擎 | -| 数据层 | Schema v23: `repos`/`vault_notes`/`papers`/`workflows`/`repo_modules_legacy` 表已删除;`entities` 为唯一数据源;`repo_tags/repo_remotes/repo_health/...` 为独立 JOIN 表(无 FK);仅 `skills` 保留独立表(embedding BLOB) | -| CI/CD | `.github/workflows/ci.yml`:check / test / fmt / clippy on Windows | -| 依赖安全 | `cargo audit` 0 漏洞(除上游 `tokei` 的 `RUSTSEC-2020-0163`) | - -## 架构红线(Architecture Guardrails) - -> 基于第一性原理的工程约束。违反任意一条 = HALT,转交人类裁决或回滚。 -> 规则编号 `RF-XX`(Red-line / Fitness function),带客观测量标准,非主观描述。 - -### RF-1: 依赖注入优于全局状态(Global State Anti-Pattern) - -**理论锚定**:全局可变状态使组件隐式耦合,破坏可测试性与可复用性(参考:Pure Function / DI 原则)。 - -**规则**: -- 禁止新增 `dirs::data_local_dir()` / `std::env::var_os` 硬编码路径。 -- 所有 IO 边界路径(DB、索引、备份、配置)必须通过参数、构造函数或 `trait` 注入。 -- **例外(Grandfathered)**:现有 3 处(`backup_dir`、`db_path`、`index_path`)在重构前不得新增第 4 处。 - -**Fitness Function**: -```bash -# 新增 PR 中不得出现新的全局路径硬编码 -grep -rn "dirs::data_local_dir\|std::env::var_os\|std::env::var(\"LOCALAPPDATA\"" src/ \ - | grep -v "backup.rs\|migrate.rs\|search.rs" -# 预期输出:空 -``` - -### RF-2: 测试密封性(Hermetic Testing) - -**理论锚定**:测试失败必须仅因被测代码缺陷,不因外部因素、测试顺序或并行调度(参考:Google Test Blog — Hermetic Servers)。 - -**规则**: -- 所有测试禁止修改全局进程状态(`std::env::set_var`、`static mut`、全局文件系统句柄)。 -- 文件系统测试必须使用 `tempfile` + 注入式路径,禁止直接操作 `%LOCALAPPDATA%` 或 `~/.config`。 -- Tantivy / SQLite 文件系统测试必须获取 `SEARCH_TEST_LOCK`(或同等级串行化机制)。 - -**子规则(来自 PR #4 教训)**: -- **R2.1 禁止 `DEVBASE_DATA_DIR` 全局注入**:并行测试中 `std::env::set_var("DEVBASE_DATA_DIR", ...)` 导致竞态;必须使用 `TempStorageBackend` 注入式替代。 -- **R2.2 Windows 路径双端规范化**:`TempDir` 可能返回短文件名(`TEMP~1`),而 `dunce::canonicalize` 返回长文件名;路径比较前必须对**双方**调用 `dunce::canonicalize`。 -- **R2.3 `git2` 测试显式身份 + 显式分支**: - - CI runner 无全局 `user.name`/`user.email` → `repo.signature()` 会 panic;必须改用 `git2::Signature::now("Test", "test@example.com")`。 - - `git2::Repository::init` 的默认分支在不同平台可能为 `master` 或 `main`;必须显式 `repo.set_head("refs/heads/main")` 并 commit 到 `"refs/heads/main"`。 - -**Fitness Function**: -```bash -# 高并发下 100% 通过,无 flaky -cargo test --test-threads=16 -``` - -### RF-3: Schema 单一事实来源(Single Source of Truth) - -**理论锚定**:重复信息必然 drift(参考:DRY 原则 + Evolutionary Architecture 的版本一致性约束)。 - -**规则**: -- `SCHEMA_DDL`(`registry/test_helpers.rs`)与 `migrate.rs` 必须原子同步。 -- 新增表、索引、列必须同时出现在两者中;禁止仅更新其一。 - -**Fitness Function**: -- CI 运行 `test_in_memory_schema_version` + schema 结构比对脚本(可手动运行 `cargo test registry::test_helpers::tests` 验证)。 - -### RF-4: 二进制入口限界(Bounded Context) - -**理论锚定**:CLI 入口应仅做命令分发,业务逻辑应在 lib 模块中(参考:Hexagonal Architecture / Ports & Adapters)。 - -**规则**: -- `main.rs` 行数不得超过 **1000 行**。 -- 新增 CLI 命令必须先拆分为 `commands/` 子模块或独立函数,禁止在 `main.rs` 中堆积业务逻辑。 - -**Fitness Function**: -```bash -# 当前 515 行(Phase 1/2/3 已削减 1003 行),远超目标 -[ $(wc -l < src/main.rs) -le 1000 ] || exit 1 -``` - -### RF-5: 无循环依赖(Acyclic Dependencies) - -**理论锚定**:循环依赖破坏模块化,使增量编译和独立复用不可能(参考:John Lakos — Large-Scale C++ Software Design)。 - -**规则**: -- 禁止模块间双向 `use crate::` 引用。 -- 新增模块必须通过脚本验证无循环(当前已满足,未来 PR 保持)。 - -**Fitness Function**: -```bash -# 文件级双向依赖检测(当前输出应为空) -for f in src/**/*.rs; do - name=$(basename "$f" .rs) - refs=$(grep -o 'use crate::[a-z_]*' "$f" | sed 's/use crate:://') - for r in $refs; do - if [ -f "src/$r.rs" ] && grep -q "use crate::$name\b" "src/$r.rs"; then - echo "CYCLE: $name <-> $r" - fi - done -done -``` - -### RF-7: Workspace 拆分约束(Module Distribution Readiness) - -**理论锚定**:模块能否独立发布是耦合健康度的金标准;不能拆分的模块 = 耦合不健康的模块。 - -**规则**: -- 新增模块若对 devbase 内部其他模块的 `crate::` 引用超过 **5 个**,禁止提取为 workspace crate。 -- 已提取 crate 的重新导出文件(`src/symbol_links.rs` 等)**禁止添加新代码**——顶部有 `RE-EXPORT ONLY` 注释作为守卫。 -- 子 crate 的依赖版本必须与 workspace 统一,禁止独立 bump。 - -**Fitness Function**: -```bash -# 扫描所有 src/*.rs,统计 crate:: 引用数 -for f in src/*.rs; do - count=$(grep -c 'crate::' "$f") - if [ "$count" -gt 15 ]; then - echo "HIGH COUPLING: $f ($count refs)" - fi -done -# 预期输出:空(或仅已标记的高耦合文件如 mcp/tools/repo.rs) -``` - -### RF-6: 生产代码无 panic(Crash-only Software) - -**理论锚定**:Rust 的 `Result` 类型将错误显式化;`unwrap` 是将运行时崩溃隐藏在类型系统背后(参考:Joe Armstrong — Let it crash,但 Rust 中崩溃 = 进程终止,不可接受)。 - -**规则**: -- 生产代码(`src/**/*.rs` 中不在 `#[cfg(test)]` 块内的代码)禁止 `unwrap()`、`expect()`、`panic!()`。 -- 测试代码不受此限,但鼓励使用 `?` 传播。 - -**Fitness Function**: -```bash -# 生产代码 unwrap 计数(排除 #[cfg(test)] 块及 tests.rs 文件) -for f in $(find src -name "*.rs"); do - test_line=$(grep -n "#\[cfg(test)\]" "$f" | head -1 | cut -d: -f1) - if echo "$f" | grep -qE "tests?\.rs$|_test\.rs$|/tests/"; then continue; fi - if [ -n "$test_line" ]; then - head -n "$((test_line - 1))" "$f" | grep -n "\.unwrap()" - else - grep -n "\.unwrap()" "$f" - fi -done -# 预期输出:空 -``` - -**状态**:🟢 **已完成**(v0.20.1 复核:生产代码 unwrap = 0;此前 1090 为测试模块误统计)。 - -### 架构治理框架(Architecture Governance) - -> 参考:外部架构治理方法论(Kimi 会话 `e9f2965f-b949-46a5-9d7c-afd6d4d9232c`) - -**已制度化实践**: - -| 实践 | devbase 落地形式 | 文档位置 | -|------|-----------------|---------| -| ADR(架构决策记录) | ADR-001(单 crate defer)、ADR-002(batch encoding 回滚) | [`docs/architecture/adr-template.md`](docs/architecture/adr-template.md) | -| 不变量清单(Invariants) | RF-1~RF-7 + 分层模块约束(T01–T12) | [`docs/architecture/invariants.md`](docs/architecture/invariants.md) | -| 模块提取演习 | RF-7 的 5 个 `crate::` 引用阈值 + 已提取 18 workspace crates | 本文件 §RF-7 | -| 三层摘要 | `crates/*/README.md` 要求:一句话 + 一页纸 + 深度链接 | 各 crate README | -| 定期架构回顾 | 每次 Wave 结束时的架构审计(见 `docs/_audit/`) | `docs/_audit/2026-04-26-*.md` | - -**待增强**: -- 三层摘要:部分已提取 crate 的 README 尚未达到"一页纸"标准 -- 定期架构回顾:当前按 Wave(功能迭代周期)触发,建议每 2–4 周增加一次纯架构 review(不看 feature 进度,只看不变量违反和隐式依赖) - ---- - -## 禁止事项 - -- 不得修改 `dev\third_party\*` 外部仓库 -- 不得在没有迁移逻辑的情况下修改 registry schema -- 不得引入已 deprecated 的协议 -- **不得在主仓库引入 Spark/Flink 依赖**(研究性质代码必须置于独立仓库,保持主仓库轻量) -- **不得在任何源码文件中硬编码真实 token、api_key 或密码**(包括注释和测试数据) - -> 完整版(含历史记录、路线图、详细讨论):见 docs/AGENTS-full.md +# Agent 环境指引 + +> 本文件面向不了解项目的 AI coding agent。它汇总了 `devbase` 的架构、构建、测试、安全与开发约定。请在修改代码前先阅读本文件,并遵循其中的红线与检查清单。 + +## 1. 项目概览 + +**devbase**(v0.20.1)是一个本地优先的开发者工作空间数据库与知识库管理器。它把代码仓库、笔记(Vault)、Skill 与工作流编译成 AI 可推理的结构化情境,核心职责是: + +- **感知**:扫描 Git 仓库、分析代码结构、解析 Vault 笔记。 +- **编码**:把原始资产转化为统一实体模型(`Node`/`Edge`)、图谱关系、向量索引。 +- **持久化**:本地 SQLite Registry + Tantivy 全文/符号索引 + 文件系统 Workspace。 +- **检索**:通过 MCP(Model Context Protocol)向 AI 客户端暴露 71 个工具。 + +项目主页:`https://github.com/juice094/devbase` +许可证:AGPL-3.0-or-later(双许可,商业使用需联系作者)。 + +### 当前关键指标(基于仓库实际内容) + +| 指标 | 数值 | +|------|------| +| 版本 | `0.20.1` | +| Rust Edition | `2024`(要求 rustc 1.95+) | +| Registry Schema | `v36`(`src/registry/migrate.rs`) | +| MCP Tools | **71** 个(`src/mcp/tools/*.rs` 中 `pub struct Devkit*Tool`) | +| 测试函数 | **605** 个(`cargo test --workspace -- --list`) | +| Ignored 测试 | 7 个(`#[ignore]` 在 `src/` 6 个 + `crates/devbase-embedding` 1 个) | +| Workspace Crates | **12** 个(`crates/` 目录) | +| `src/main.rs` 行数 | 833 行(RF-4 限界 1000 行内) | +| Clippy | `-D warnings` / CI `-W warnings` | +| 生产代码 `unwrap` | 0(架构红线 RF-6) | + +> 注意:仓库内原有文档可能写到“18/19 个 crate”“495 tests”“main.rs 515 行”,那是历史快照;本文件以当前文件系统与 `cargo test --workspace -- --list` 的实际输出为准。 + +## 2. 技术栈与依赖 + +| 用途 | 技术/库 | +|------|---------| +| CLI / 子命令 | `clap` derive | +| 异步运行时 | `tokio`(rt-multi-thread, macros, process, io-util, io-std, sync) | +| 数据库 | `rusqlite` + `r2d2`/`r2d2_sqlite`(WAL 模式,bundled) | +| 全文/符号检索 | `tantivy` | +| Git 操作 | `git2` | +| 代码解析 | `tree-sitter` + 可选 grammar(rust/python/typescript/go) | +| 终端 UI | `ratatui` + `crossterm`(feature `tui`) | +| 文件监控 | `notify`(feature `watch`) | +| HTTP | `reqwest` / `ureq`(embedding crate) | +| 序列化 | `serde`/`serde_json`/`serde_yaml` + `toml` | +| 日志 | `tracing`/`tracing-subscriber` | +| 哈希/并行 | `blake3`、`rayon`、`crossbeam-channel` | +| 构建/测试辅助 | `tempfile`、`assert_cmd`、`predicates`、`criterion`(bench) | + +### Cargo Features + +```toml +default = ["tui", "mcp", "lang-rust", "lang-python", "lang-js-ts", "lang-go"] +``` + +- `tui`:终端仪表盘。 +- `mcp`:MCP Server。 +- `lang-*`:tree-sitter 语言支持。 +- `embedding`:启用 `devbase-embedding` crate(Candle/Ollama),**不在 default**,需显式 `--features embedding`。 +- `greptimedb`:可选 GreptimeDB 写入。 +- `watch`:目录监控(由 `tui` 间接启用)。 + +## 3. 仓库布局 + +``` +devbase/ +├── Cargo.toml # 主包 + workspace 定义(members = ["crates/*"]) +├── rustfmt.toml # 格式化配置 +├── mcp.json # MCP 客户端配置示例 +├── .github/workflows/ # CI(check/test/fmt/clippy/audit/invariant)+ Release +├── .githooks/pre-commit # 提交前 fmt + clippy +├── .cargo/config.toml # RUST_TEST_THREADS=1 +├── src/ +│ ├── main.rs # CLI 入口,仅做命令分发(833 行) +│ ├── lib.rs # 导出 30+ 模块,条件编译 mcp/tui/watch +│ ├── commands/ # CLI 子命令实现 +│ ├── core/ # 原子类型:Node / Edge / NodeType +│ ├── registry/ # SQLite Registry:schema、迁移、实体、关系、健康 +│ ├── repository/ # Git 仓库实体抽象 +│ ├── search/ # Tantivy 索引、混合检索(BM25 + 向量) +│ ├── semantic_index/ # 语义索引持久化 +│ ├── skill_runtime/ # Skill 发现、安装、执行、评分、发布 +│ ├── workflow/ # YAML 工作流:解析、校验、调度、执行 +│ ├── vault/ # PARA 笔记系统、双向链接、BFS 图、历史 +│ ├── mcp/ # MCP Server + 71 个 tools +│ ├── tui/ # ratatui 仪表盘(render/ + state/) +│ ├── sync/ # 仓库同步编排与策略 +│ ├── storage.rs # StorageBackend trait + AppContext(依赖注入容器) +│ ├── config.rs # 配置与凭证模板 +│ ├── i18n/ # 国际化(zh_cn / en) +│ └── ... +├── crates/ # 12 个独立 workspace crate +│ ├── devbase-core-types +│ ├── devbase-registry +│ ├── devbase-embedding +│ ├── devbase-skill-runtime-types +│ ├── devbase-skill-runtime-parser +│ ├── devbase-symbol-links +│ ├── devbase-sync-protocol +│ ├── devbase-syncthing-client +│ ├── devbase-vault-frontmatter +│ ├── devbase-vault-wikilink +│ ├── devbase-workflow-interpolate +│ └── devbase-workflow-model +├── tests/ +│ └── cli.rs # 11 个集成测试 +├── benches/ +│ ├── registry_bench.rs +│ └── semantic_index.rs +├── skills/ # 示例 Skill(embed-repo / knowledge-report / search-workspace) +├── scripts/ +│ ├── install.ps1 / install.sh +│ ├── devbase-claude.ps1 +│ └── invariant-checks/run-checks.ps1 +└── docs/ # 架构文档、ADR、RFC、指南 +``` + +### Workspace Crates 职责 + +| Crate | 说明 | +|-------|------| +| `devbase-core-types` | 统一实体模型 `Node`/`Edge`/`NodeType`,零内部耦合 | +| `devbase-registry` | SQLite Registry 操作;内部子模块:entity/health/metrics/call_graph/code_symbols/dead_code/relation/workspace | +| `devbase-embedding` | Embedding 生成协议;Candle/Ollama backend、`cosine_similarity` | +| `devbase-skill-runtime-types` | Skill Runtime 类型与枚举 | +| `devbase-skill-runtime-parser` | `SKILL.md` frontmatter 解析 | +| `devbase-symbol-links` | 代码符号链接生成(相似签名、共位关系) | +| `devbase-sync-protocol` | 目录同步协议与版本向量 | +| `devbase-syncthing-client` | Syncthing REST API 客户端 | +| `devbase-vault-frontmatter` | Vault 笔记 frontmatter 解析 | +| `devbase-vault-wikilink` | `[[wiki-link]]` / `[[note#anchor]]` 解析与解析 | +| `devbase-workflow-interpolate` | 工作流变量插值 | +| `devbase-workflow-model` | YAML Workflow 定义类型 | + +## 4. 构建、运行与测试 + +### 环境要求 + +- **Rust 1.95.0+** +- 主要开发/CI 平台:**Windows**(Linux/macOS 社区支持) +- 可选:`sccache` 可显著加速 tree-sitter grammar 的 C 编译(见 `CONTRIBUTING.md`) + +### 常用命令 + +```powershell +# 构建 +cargo build --release + +# 本地快速体验 +cargo run -- scan . --register +cargo run -- tui +cargo run -- mcp + +# 测试(与 CI 一致) +cargo test --all-targets +cargo test --workspace -- --test-threads=4 + +# 静态检查 +cargo clippy --all-targets -D warnings +cargo fmt --check + +# 审计 +cargo audit + +# 架构不变量检查(CI 的 invariant job) +scripts/invariant-checks/run-checks.ps1 +``` + +### 测试策略 + +- **单元测试**:分布在 `src/**/tests.rs` 与 `#[cfg(test)]` 块中。 +- **集成测试**:`tests/cli.rs`,使用 `assert_cmd` + `tempfile`,通过 `DEVBASE_DATA_DIR` 隔离数据目录。 +- **Crate 测试**:每个 `crates/*/src/*.rs` 自带测试。 +- **Bench**:`criterion` 驱动的 `benches/registry_bench.rs`、`benches/semantic_index.rs`。 +- **测试隔离**: + - 所有 IO 测试使用 `TempDir` 与 `StorageBackend` 注入,禁止直接写 `%LOCALAPPDATA%`。 + - `.cargo/config.toml` 默认 `RUST_TEST_THREADS=1`;CI 使用 `--test-threads=4`。 + - `git2` 测试必须显式 `Signature::now("Test", "test@example.com")` 与 `repo.set_head("refs/heads/main")`。 +- **网络相关测试**:`crates/devbase-embedding` 中 Candle 测试会下载模型,离线环境会失败;CI/在线环境需保证网络可达。 + +### 提交前必须通过 + +```powershell +cargo test --all-targets +cargo clippy --all-targets -D warnings +cargo fmt --check +``` + +仓库已配置 `.githooks/pre-commit` 执行 `cargo fmt --check` 与 `cargo clippy --all-targets -- -D warnings`。 + +## 5. 代码风格与约定 + +### 格式化 + +`rustfmt.toml`: + +```toml +edition = "2024" +max_width = 100 +chain_width = 80 +fn_call_width = 80 +struct_lit_width = 30 +array_width = 80 +reorder_imports = true +``` + +### 提交规范(Conventional Commits) + +``` +feat: 新功能 +fix: Bug 修复 +docs: 文档更新 +refactor: 重构(无行为变更) +test: 测试相关 +chore: 构建/工具链 +perf: 性能优化 +``` + +示例: + +``` +feat(mcp): add devkit_skill_validate tool +``` + +### 源文件头 + +新增源文件应在顶部包含 SPDX 许可证头(项目主许可证为 AGPL-3.0-or-later): + +```rust +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 +``` + +> 注意:仓库内部分历史文件仍使用 `MIT` SPDX 头,新文件统一使用 AGPL。 + +### 工具使用约定 + +- **读文件**:优先使用 `Read` 工具;不要直接用 `cat`/`head`。 +- **搜索**:优先使用 `Grep`/`Glob`;不要直接用 shell `grep`/`find`。 +- **小修改**:使用 `Edit`(按原文件内容精确替换)。 +- **整文件/新建**:使用 `Write`。 +- **多文件操作/构建/测试**:使用 `Bash`。 + +### 添加 MCP Tool 的标准路径 + +1. 在 `src/mcp/tools/` 新建模块。 +2. 实现 `McpTool` trait(`name()`、`schema()`、`invoke()`,可选 `invoke_stream()`)。 +3. 在 `src/mcp/tools/mod.rs` 注册并 `pub use`。 +4. 在 `src/mcp/mod.rs` 的 `McpToolEnum` / 路由中加入该工具。 +5. 在 `src/mcp/tests.rs` 添加单元测试。 +6. 更新 `README.md` Tool 矩阵与 `AGENTS.md` 工具计数。 + +**核心原则**:所有状态变更操作必须幂等(`ON CONFLICT ... DO UPDATE`)。 + +## 6. 数据存储、Schema 与迁移 + +### 存储位置 + +默认使用用户本地数据目录(可通过 `DEVBASE_DATA_DIR` 覆盖): + +``` +%LOCALAPPDATA%/devbase/ # Windows +~/.local/share/devbase/ # Linux +~/Library/Application Support/devbase/ # macOS +``` + +目录内容: + +``` +devbase/ +├── registry.db # SQLite Registry(WAL 模式) +├── registry.db-wal +├── search_index/ # Tantivy 全文索引 +├── symbol_index/ # Tantivy 代码符号索引 +├── backups/ # 自动备份 +└── workspace/ + ├── vault/ # PARA 笔记(00-Inbox, 01-Projects, ...) + └── assets/ # 二进制资源 +``` + +### Schema 单一事实来源 + +- `src/registry/migrate.rs`:当前 Schema DDL + 迁移逻辑。 +- `src/registry/migrations/v*.rs`:v01 到 v36 的增量迁移脚本。 +- `src/registry/test_helpers.rs`:`SCHEMA_DDL` 必须与 `migrate.rs` 保持原子同步。 +- `CURRENT_SCHEMA_VERSION = 36`。 + +### Schema 迁移规范 + +1. 在 `migrate.rs` 新增版本判断块,使用 `ALTER TABLE ... ADD COLUMN`(SQLite 限制)。 +2. 升级前必须调用 `backup::auto_backup_before_migration()` 生成 `backup-YYYYMMDD-HHMMSS.db`。 +3. 同步更新 `test_helpers.rs` 的 `SCHEMA_DDL`。 +4. 更新 `AGENTS.md` 的 Schema 版本号与 `CURRENT_SCHEMA_VERSION`。 + +**禁止**:直接修改现有表的列定义;不得在无迁移逻辑的情况下修改 registry schema。 + +## 7. 架构红线(Architecture Guardrails) + +违反任意一条 = **HALT**,转交人类裁决或回滚。完整清单与检测脚本见 `docs/architecture/invariants.md`。 + +### RF-1:依赖注入优于全局状态 + +- 禁止新增 `dirs::data_local_dir()` / `std::env::var_os` 硬编码路径。 +- 所有 IO 边界路径通过参数、构造函数或 `StorageBackend` trait 注入。 +- 例外(Grandfathered):`backup_dir`、`db_path`、`index_path` 在重构前不得新增第 4 处。 + +Fitness function: + +```bash +grep -rn "dirs::data_local_dir\|std::env::var_os\|std::env::var(\"LOCALAPPDATA\"" src/ \ + | grep -v "backup.rs\|migrate.rs\|search.rs" +# 预期输出:空 +``` + +### RF-2:测试密封性(Hermetic Testing) + +- 测试禁止修改全局进程状态(`std::env::set_var`、`static mut`、全局文件系统句柄)。 +- 文件系统测试使用 `tempfile` + `StorageBackend` 注入。 +- Tantivy / SQLite 文件系统测试必须串行化。 +- R2.1 禁止 `DEVBASE_DATA_DIR` 全局注入;R2.2 Windows 路径双端 `dunce::canonicalize`;R2.3 `git2` 测试显式身份与分支。 + +Fitness function: + +```bash +cargo test --test-threads=16 +``` + +### RF-3:Schema 单一事实来源 + +- `SCHEMA_DDL` 与 `migrate.rs` 必须原子同步。 +- CI 运行 `test_in_memory_schema_version` + schema 结构比对。 + +### RF-4:二进制入口限界 + +- `main.rs` 不得超过 1000 行;当前 833 行。 +- 新增 CLI 命令必须拆分到 `src/commands/` 子模块。 + +### RF-5:无循环依赖 + +- 禁止模块间双向 `use crate::` 引用。 + +### RF-6:生产代码无 panic + +- 生产代码禁止 `unwrap()` / `expect()` / `panic!()`(测试代码除外)。 +- 状态:当前生产代码 unwrap 计数为 0。 + +Fitness function: + +```bash +for f in $(find src -name "*.rs"); do + test_line=$(grep -n "#\[cfg(test)\]" "$f" | head -1 | cut -d: -f1) + if echo "$f" | grep -qE "tests?\.rs$|_test\.rs$|/tests/"; then continue; fi + if [ -n "$test_line" ]; then + head -n "$((test_line - 1))" "$f" | grep -n "\.unwrap()" + else + grep -n "\.unwrap()" "$f" + fi +done +# 预期输出:空 +``` + +### RF-7:Workspace 拆分约束 + +- 新增模块若对 devbase 内部其他模块的 `crate::` 引用超过 5 个,禁止提取为 workspace crate。 +- 已提取 crate 的重新导出文件(如 `src/symbol_links.rs`)顶部标有 `RE-EXPORT ONLY`,禁止添加新代码。 +- 子 crate 依赖版本必须与 workspace 统一。 + +### 关键分层不变量(G/T) + +| 编号 | 规则 | +|------|------| +| G1 | `registry::WorkspaceRegistry` 不得依赖 Tier 4+ 模块 | +| G3 | 所有状态变更 MCP tool 必须幂等 | +| G4 | Breaking change 只能通过新增 tool 实现,不修改现有 schema | +| G5 | 生产代码不得新增 `unwrap`/`expect`(RF-6) | +| T11 | `mcp/tools/*` 不得直接调用 `rusqlite::Connection`,必须通过 registry 封装(已知例外:`repo.rs`、`brief.rs`、`impact.rs`) | +| T12 | `tui/render/*` 是纯消费者层,禁止写入 registry | + +CI 通过 `scripts/invariant-checks/run-checks.ps1` 检测 G5 / T11 / T12 / README+Cargo.toml 完整性。 + +## 8. 安全与隐私原则 + +### 本地优先(Local-First) + +- Registry DB 只存在用户本地配置目录,**不向远程传输**。 +- 代码内容默认不上云(除非用户显式配置 GitHub token 用于 stars 查询)。 +- MCP Server 仅通过 **stdio** 本地进程通信,不暴露网络端口。 + +### 客户端无关(Client-Agnostic) + +- 允许:向通用目录输出数据;实现标准协议(MCP)。 +- 禁止:核心能力硬编码特定客户端路径/API/配置;核心能力可用性依赖某个客户端是否安装。 +- `scripts/claude/`、`docs/clients/` 属于适配示例,不归入核心版本控制。 + +### 凭证管理 + +- GitHub token、LLM API key 存储在本地 `config.toml`(用户配置目录,**不在项目工作目录**)。 +- 模板使用占位符 ``,禁止在源码/注释/测试数据中硬编码真实凭证。 +- `.gitignore` 已覆盖 `*.db`、`.devbase/`、`.env*`、`*.local.toml`。 + +### 审计与备份 + +- 所有 `scan`/`sync`/`health` 操作自动写入 OpLog(SQLite `oplog` 表)。 +- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照。 +- Registry 支持 `export`/`import` 用于用户自主备份。 + +## 9. CLI / MCP / TUI 能力速览 + +### 顶层 CLI 命令 + +| 分组 | 命令 | +|------|------| +| 仓库管理 | `scan`、`health`、`status`、`sync`、`query`、`index`、`tag`、`meta`、`repo` | +| 代码分析 | `metrics`、`module-graph`、`call-graph`、`dependency-graph`、`code-symbols`、`dead-code`、`github-info` | +| 知识/Vault | `digest`、`knowledge-report`、`oplog`、`vault`、`ontology` | +| Skill / Workflow | `skill`、`workflow` | +| 系统 | `tui`(feature `tui`)、`mcp`(feature `mcp`)、`daemon`、`watch`(feature `watch`)、`syncthing-push`、`skill-sync`、`limit`、`registry`、`clean`、`version` | + +### MCP Server + +- 启动:`devbase mcp` +- 传输:**stdio only** +- 工具示例:`devkit_scan`、`devkit_health`、`devkit_sync`、`devkit_query`、`devkit_index`、`devkit_vault_search`、`devkit_skill_run`、`devkit_workflow_run`、`devkit_session_recall`、`devkit_project_brief` 等共 71 个。 +- 客户端配置示例见 `mcp.json`: + +```json +{ + "mcpServers": { + "devbase": { "command": "devbase", "args": ["mcp"] } + } +} +``` + +### TUI + +- 启动:`devbase tui` +- 基于 `ratatui` 的异步事件循环,支持跨仓库导航、安全同步预览、标签聚类、搜索。 + +## 10. CI/CD 与发布 + +### CI(`.github/workflows/ci.yml`) + +在 Windows runner 上执行: + +1. `cargo check --all-targets` +2. `cargo test --lib --tests --bins --examples -- --test-threads=4 --nocapture` +3. `cargo fmt --check` +4. `cargo clippy --all-targets --verbose -- -W warnings` +5. `cargo audit` +6. `scripts/invariant-checks/run-checks.ps1` + +### Release(`.github/workflows/release.yml`) + +- 触发:推送 `v*` tag。 +- 构建 Windows x64 zip 与 Linux x64 tar.gz,附带 `README.md`、`LICENSE`、`CHANGELOG.md`。 +- 上传至 GitHub Release。 + +### 安装脚本 + +- Windows:`scripts/install.ps1` +- Linux/macOS:`scripts/install.sh` +- Claude Code 启动器:`scripts/devbase-claude.ps1` + +## 11. Skill 与工作流 + +### Skill + +- 元数据:目录下的 `SKILL.md`,frontmatter 必须包含 `id`、`name`、`version`、`description`,可选 `dependencies`。 +- 入口脚本支持 `py`、`sh`、`ps1`、`js` 或二进制。 +- 命令:`skill discover`、`skill run`、`skill install`、`skill publish`、`skill sync`。 +- 评分:`success_rate`、`usage_count`、`rating`(0-5)。 + +### Workflow + +- YAML 定义,5 种 step 类型:`skill`、`subworkflow`、`parallel`、`condition`、`loop`。 +- 拓扑调度 + batch 并行执行。 +- 规范见 `docs/architecture/workflow-dsl.md`。 + +## 12. 禁止事项 + +- 不得修改 `dev/third_party/*` 外部仓库。 +- 不得在没有迁移逻辑的情况下修改 registry schema。 +- 不得引入已 deprecated 的协议。 +- **不得在主仓库引入 Spark/Flink 依赖**(研究性质代码必须置于独立仓库)。 +- **不得在任何源码文件中硬编码真实 token、api_key 或密码**(包括注释和测试数据)。 + +## 13. 参考文档 + +| 文档 | 内容 | +|------|------| +| `README.md` | 项目简介、快速开始、技术栈 | +| `CONTRIBUTING.md` | 贡献指南、构建加速、代码规范、Skill/MCP 添加路径 | +| `docs/architecture/overview.md` | 三层架构、技术决策记录 | +| `docs/architecture/invariants.md` | 完整不变量清单(G/T) | +| `docs/architecture/workflow-dsl.md` | Workflow DSL 规范 | +| `docs/architecture/workspace-as-schema.md` | 统一实体模型 | +| `docs/guides/mcp-integration-guide.md` | MCP 集成指南 | +| `docs/README.md` | 完整文档导航 | +| `docs/ROADMAP.md` | 历史 Waves、功能路线图与讨论 | +| `CHANGELOG.md` | 版本变更日志 | + +--- + +*本文件应随项目结构、Schema 版本、工具数量、测试数量等变更同步更新。* diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f26b8d..107b94f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,694 +1,694 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -### Added - -- **FTS5 技能全文搜索** (Schema v35) — `skills_fts` 虚拟表 + 触发器,`search_skills_text()` 使用 BM25 排序(name=1.0, desc=0.8, tags=0.4, category=0.2),LIKE 降级 fallback -- **可插拔外部技能源** (Schema v36) — `SkillSource` trait + `GitHubSource` / `LocalFileSource`,`sync_sources` / `sync_log` 审计表,`devkit_skill_sync` MCP 工具(Beta tier),`devbase skill import` CLI 子命令 -- **Vault 多根目录 + symlink 跟随** — `VaultConfig`(roots + follow_symlinks),多根目录扫描 `scan_vault_with_options`,`resolve_vault_write_path` 实体回溯路径解析,`devbase vault sync` CLI -- **Ontology 导入** — `devkit_ontology_import` MCP 工具(Beta tier),`devbase ontology` CLI(`--dry-run` 预览),支持 OpenClaw workspace `ontology/entities/*.json` + `ontology/relations/*.jsonl` 批量导入 -- MCP 工具数: 69 → **71**(5 stable + 62 beta + 4 experimental) -- `devkit_document_convert` — Experimental tier MCP tool,PDF/PPTX → Markdown 转换(`pdftotext` / `python-pptx` 流水线),含 frontmatter 质量标注 -- Stable 工具 invocation 测试补全:`devkit_query_repos`、`devkit_vault_search`、`devkit_vault_read`、`devkit_status`、`devkit_workflow_list`、`devkit_index` -- `seed_repo()` 轻量测试 helper(仅插入 `entities` 表,无副作用) - -### Fixed - -- `mcp/tools/document_convert.rs` 原始字符串定界符修复(`r###"` 避免与 Python f-string `"##` 冲突) -- `cleanup_extracted_text` 单元测试期望值与实现语义对齐(保留最多 2 个连续空行) - -### Changed - -- **Workspace crate 架构重组** — 消除机械提取造成的微 crate 碎片 - - 合并 8 个 `devbase-registry-*` 微 crate(100–300 行/个)为统一 `devbase-registry`,含 8 个语义子模块(`entity`, `health`, `metrics`, `relation`, `call_graph`, `code_symbols`, `dead_code`, `workspace`) - - 拆分 10+ 个 monolithic `lib.rs` 为域驱动子模块:`devbase-embedding` (`candle`/`ollama`), `devbase-workflow-model` (`definition`/`execution`/`step_type`), `devbase-symbol-links` (`similarity`/`co_located`), `devbase-sync-protocol` (`index`/`version_vector`), `devbase-skill-runtime-types` (`skill_type`/`execution`/`params`), `devbase-skill-runtime-parser` (`frontmatter`/`field_parsers`), `devbase-workflow-interpolate` (`resolver`), `devbase-vault-frontmatter` (`parser`), `devbase-vault-wikilink` (`parser`), `devbase-core-types` (`node_type`/`node`/`edge`) - - 全 workspace `Cargo.toml` 统一使用 `[workspace.package]` 继承(`version`, `edition`, `authors`, `license`, `repository`) -- `KNOWN_ISSUES.md` 更新:document_convert 从 P3 债务移至已解决归档;测试计数 485→494 -- `docs/reference/mcp-tools.md` 修正为 69 个工具,补充 Index / Workflow / Relation / KnownLimit / Session 分类 -- `docs/reference/stable-tools/README.md` 修正为 5 个 Stable 工具(删除过时的 `project_brief.md` / `hybrid_search.md` / `session_recall.md`) - -## [0.20.1] - 2026-05-17 - -### Added - -- **Phase 1 Production Hardening** - - Workflow E2E 测试 — `src/mcp/tools/workflow.rs`:DAG 成功执行、失败传播验证 - - RF-7 路径隐私脱敏 — `sanitize_path()` 自动掩码 home 目录为 `~` - - Tantivy 一致性修复 — `repair_tantivy_consistency_at()` 启动时自动检测 orphan/missing 文档 - - 性能回归基线 — `test_keyword_search_latency_regression_1k` / `_10k`(profile-aware 阈值) - - `TempStorageBackend` — 测试隔离后端,消除 `DEVBASE_DATA_DIR` 竞态 -- **Architecture Invariants CI 自动化** — `scripts/invariant-checks/run-checks.ps1` - - G5 (RF-6):diff-only 检测新增生产代码 `unwrap`/`expect`/`panic`(排除 `#[cfg(test)]`) - - T11:`mcp/tools` 禁止直接调用 `rusqlite::Connection` - - T12:`tui/render` 纯消费检查(禁止写入操作) - -### Fixed - -- `AppContext::with_storage()` 使用实际 storage backend 的 `index_path()` 而非硬编码默认值 -- G5 invariant checker 正则修复:`tests.rs` 文件正确跳过 -- `Cargo.lock` 同步版本 bump(修复 `--locked` release 构建失败) -- 平台相关测试隔离:`C:\` 路径断言加 `#[cfg(windows)]`,Linux `python3` 断言适配 -- HuggingFace 网络依赖测试加 `#[ignore]`(避免 CI TLS 证书失败) - -## [0.20.0] - 2026-05-16 - -### Added - -- **知识完备性**:Vault 双向链接图遍历(BFS depth 1-3)+ `[[note#heading]]` block 引用 -- **Vault 笔记历史追踪** — Git-based blob diff,`devkit_vault_history` tool -- **混合检索质量监控** — `HybridSearchMetrics`(latency/recall/overlap/keyword_source) -- **性能回归基线** — Criterion benchmarks:`index_repo_full`、`cosine_similarity`、`extract_symbols` -- **客户端无关原则** — `StorageBackend` trait 完整实现,解耦 `dirs::data_local_dir()` 硬编码 -- **MCP Tools +4** (68 total) - - `devkit_vault_history`, `devkit_vault_export`, `devkit_vault_graph`, `devkit_vault_daily` - -### Changed - -- 20+ 独立 crate 零循环依赖,workspace 拆分完成 -- `entities` 表成为唯一真相源,`repos` 表彻底删除 -- Tantivy / SQLite 补偿扫描:启动时自动同步 orphan 文档 - -## [0.19.0] - 2026-05-14 - -### Added - -- **SQLite WAL 模式** — `r2d2` 连接池 + WAL journal,并发安全与增量备份 -- **Tantivy 健康评分** — `devkit_index_health`:损坏检测、自动重建、孤儿文档清理 -- **Vault 导出** — `devkit_vault_export`:Obsidian-compatible Markdown 批量导出 -- **Redis ADR 决策** — `docs/architecture/adr-003-redis.md`:评估后决定保持 SQLite 优先 -- **OpLog 审计追踪** — 结构化事件类型 `OplogEventType`,全操作不可变日志 - -### Changed - -- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 -- 索引层反向一致性扫描与自动修复能力 - -## [0.18.0] - 2026-05-13 - -### Added - -- **ClaudeCode 工作流集成** — `docs/RFC/claudecode-workflow-integration.md` - - `devkit_project_brief` — 生成项目 Markdown 简报(架构 + 模块 + 近期提交 + 已知约束),用于 `.claude/CLAUDE.md` 注入 - - `devkit_impact_analysis` — 符号级变更影响半径分析(BFS 调用图遍历 + 相关符号发现 + 测试启发式 + 历史 oplog) - - `scripts/devbase-claude.ps1` — PowerShell 一键启动器:自动检测 repo → 生成简报 → 注入 `.claude/CLAUDE.md` → 启动 `claude` → 可选捕获退出 diff -- **Session 导入/导出工具** - - `devkit_session_export` — 导出会话为 Markdown / JSON;支持记忆类型图标与元数据 - - `devkit_session_import` — 从 bulk text 批量导入记忆(`[type]` 前缀解析) -- **MCP Tools +4** (64 total) - - `devkit_project_brief`, `devkit_impact_analysis`, `devkit_session_export`, `devkit_session_import` -- **TUI Session 视图硬化** - - 三态 MainView 切换:`RepoList → VaultList → Session`(`Tab` 键循环) - - Session 列表:状态图标(● active / ◌ archived)+ 高亮样式 - - Session 详情:记忆类型图标(◆ decision / ▪ constraint / ★ discovery / ✗ error)+ embedding model 标签 + indexed 状态 -- **AGENTS.md** 同步至 v0.18.0-dev 基线(64 Tools / 437 tests) - -### Changed - -- `src/mcp/mod.rs` Tool 注册表扩展至 64 工具(稳定 + Beta) -- `src/mcp/tests.rs` 工具计数断言同步 -- TUI `render_session.rs` / `state/mod.rs` 适配 Schema v34 记忆字段(`embedding_model`, `indexed_at`) - -## [0.17.0] - 2026-05-13 - -### Added - -- **Agent Memory 向量存储** — Schema v34 - - `agent_memories` 新增 `embedding BLOB`, `embedding_model TEXT`, `indexed_at DATETIME` - - Partial index `idx_agent_memories_embedding` 仅索引含向量的行 - - `AgentMemory` 结构体扩展向量元数据字段 -- **SQLite UDF: `cosine_similarity`** — `src/registry/agent_context.rs` - - 输入: 两个 little-endian f32 BLOB - - 输出: REAL ∈ [-1.0, 1.0] - - 注册时机: `WorkspaceRegistry::init_db_at` 迁移完成后自动注册 -- **语义记忆搜索** — `search_memories_semantic(context_id, query_embedding, limit)` - - 纯 SQL `ORDER BY cosine_similarity(embedding, ?) DESC` - - 零 LLM 运行时依赖;仅执行向量比对 -- **MCP Tools +2** (60 total) - - `devkit_session_recall` — 外部向量查询 + 语义召回 top-k memories - - `devkit_session_index` — 为已有 memory 注入外部生成 embedding -- **Skill Runtime Auto-Recall** — `src/skill_runtime/executor.rs` - - Tier 1: Semantic recall (本地 Candle/Ollama 或外部 HTTP endpoint) - - Tier 2: Keyword fallback (`LIKE` search on `content`) - - 新环境变量: `DEVBASE_CONTEXT_MEMORY_COUNT`, `DEVBASE_CONTEXT_RECALL_METHOD` - - `DEVBASE_CONTEXT_MEMORIES` 升级为 top-k 相关 memories(含 `score` + `model`) -- **外部 Embedding Provider 集成** - - `call_external_embedding_endpoint` — `reqwest::blocking` POST `/api/embeddings` - - 配置驱动: `config.toml [embedding]` (enabled/provider/model/base_url/timeout) - - 端到端测试: mock TCP server 验证 Ollama 格式解析 + 错误码处理 -- **RFC 文档** — `docs/RFC/agent-memory-vector-storage.md` - - 架构决策: devbase = 向量数据库层,不做 embedding 生成 - - 参照 pgvector 边界设计 - -### Changed - -- **Feature Flags**: `embedding` 从 `default` 移除 - - Candle/Ollama 依赖变为 opt-in: `--features embedding` - - 默认构建零 ML 依赖,编译时间减少 30~50% -- `insert_memory` 签名扩展: 新增可选 `embedding: Option<&[f32]>` 和 `embedding_model: Option<&str>` -- `list_memories` / `search_memories` SELECT 语句扩展为 8 列(兼容新增字段) -- AGENTS.md 同步至 v0.17.0-dev 基线 - -### Breaking Changes - -- 默认构建不再包含 `devbase-embedding` crate;需要语义生成能力的用户须显式启用 `--features embedding` -- `generate_query_embedding` 在默认构建下返回错误(提示启用 feature 或配置外部 endpoint) - -## [0.16.1] - 2026-05-13 - -### Added - -- **Workflow-Session Binding** — Schema v33 - - `workflow_executions` 新增 `context_id` 列 + 索引 - - `create_execution` 自动绑定 `resolve_active_context()` - - MCP `devkit_workflow_run` 与 CLI `workflow run` 均支持自动绑定 - - `devkit_session_workflows` tool: 列出指定 context 的 workflow 执行历史 -- `context_entity_links` 表 (Schema v32): context 与任意 entity 的多对多关联 - -## [0.16.0] - 2026-05-13 - -### Added - -- **Agent Contexts (AI Agent OS)** — Schema v31 - - `agent_contexts` 表: 持久化 AI session / project scope - - `agent_memories` 表: 结构化记忆(decision/constraint/note/discovery/error) - - 9 个 Session MCP tools: save/list/resume/attach/detach/activate/search/capture/workflows - - `resolve_active_context()`: 环境变量 `DEVBASE_ACTIVE_CONTEXT` → 文件 `.active_context` fallback - - Context-aware Skill Runtime: 注入 `DEVBASE_ACTIVE_CONTEXT` + `DEVBASE_CONTEXT_MEMORIES` + `DEVBASE_CONTEXT_LINKS` - - 所有 agent_context 操作自动写入 OpLog (`OplogEventType::AgentContext`) - -## [0.15.0] - 2026-05-04 - -### Added - -- **P1: Tantivy BM25 代码符号搜索** — `search/symbol_index.rs` - - 独立 Schema (`repo_id`, `name`, `signature`, `file_path`, `line_start`) - - `keyword_search_symbols` 主路径走 Tantivy BM25,SQLite LIKE 回退 - - 索引流程 `index.rs` 自动同步写入 symbol_index - - `StorageBackend` 扩展 `symbol_index_path()`(6 实现) -- **P3: Embedding 多后端** — Candle (默认) + Ollama (配置切换) - - 新增 `OllamaProvider` (`ureq` HTTP `/api/embed`) - - `create_provider(backend, model, base_url, timeout)` 配置化创建 - - `generate_query_embedding` 通过 `OnceLock` 懒加载配置化 provider - - 默认模型改为 `all-minilm` (384-dim,与 Candle 维度兼容) -- **P4: Health 环境检测扩展** — `EnvVersionCache` 从 5 工具 → 9 工具 - - 新增: `python`, `bun`, `zig`, `java` - - `get_tool_version` 支持 stderr fallback (Java 输出到 stderr) - - `fmt_version` 改进: Java 引号提取、Docker/Python 格式处理 -- **P5: 架构不变量自动化 CI** — `scripts/invariant-checks/run-checks.ps1` - - G5: diff-only 检测新增生产代码 unwrap/expect/panic(排除 `#[cfg(test)]`) - - T11: 检测 `mcp/tools/*` 直接调用 `rusqlite::Connection` - - T12: 检测 `tui/render/*` 写入操作 - - CI job `invariant-check` 加入 `.github/workflows/ci.yml` -- **P2 Phase 1: AppContext 职责拆分** — 6 个 Client trait impl 迁出 `storage.rs` - - `scan.rs` / `health.rs` / `sync.rs` / `digest.rs` / `knowledge_engine/mod.rs` / `registry.rs` - - `storage.rs` 860 → 430 行 (-50%) - - 删除冗余 `conn_mut()` -- **P2 Phase 2: 内联 SQL 下沉** — 新增 `registry/code_symbols.rs` + `registry/dead_code.rs` - - `CodeSymbolRow` / `DeadCodeRow` + 纯函数查询 (12 个单元测试) - - `RegistryClient` 退化为纯代理层 - -### Changed - -- `EmbeddingConfig` 默认模型 `nomic-embed-text` → `all-minilm` (384-dim) -- AGENTS.md 阶段描述更新: v0.14.3 → v0.15.0 推进中 → v0.15.0 全部完成 - -### Fixed - -- **TTL 缓存负值 bug** (`97172ec`): `elapsed < ttl_seconds` → `elapsed >= 0 && elapsed < ttl_seconds` - - 防止系统时间回溯导致缓存永不过期 -- `crates/devbase-embedding/src/lib.rs` 遗留 unwrap 清零 (`encode_with_candle` → `ok_or`) - -## [0.14.3] - 2026-05-05 - -### Added - -- **Schema v30** — `code_symbols.attributes` 列,tree-sitter 提取 `#[test]`/`#[tokio::test]` 等属性 - - `devkit_dead_code` 自动过滤测试函数,消除假阳性 - - `rust_node_to_symbol` 支持 `prev_sibling()` 回溯收集属性节点 -- **Tantivy/SQLite 补偿扫描** — 启动时自动检测并清理 orphan 文档 - - 新增 `search::sync_index_to_db(conn)`,对比 Tantivy `list_indexed_repo_ids()` 与 SQLite `entities` - - `AppContext` 初始化后自动调用,失败仅 warn 不阻塞启动 -- **Feature flags** — `mcp` + `embedding`,支持 `--no-default-features` 最小化编译 - - `default = ["tui", "mcp", "embedding"]` - - `devbase-embedding` 设为 `optional = true` - - 新增 `src/clients.rs` 提取 MCP client traits,避免 mcp feature 关闭时 trait 不可用 -- **Kimi CLI MCP 集成文档** — AGENTS.md 新增 Kimi CLI 集成状态,项目级 skill 位于 `.kimi/skills/devbase-project/` - -### Changed - -- **RF-1 架构红线** — `init_db()` 全局路径残留清零 - - `init_db()` 标记 `#[deprecated]`,新增 `init_db_with(backend: &dyn StorageBackend)` - - `workflow/executor.rs`、`workflow/state.rs`、`storage.rs` 全部改为注入式 - - `examples/` + `benches/` 中额外 5 处残留同步修复 -- `index_repo_full` 合并用户 `scan.exclude_patterns` 与默认排除模式 -- `cargo fmt` + `cargo clippy --fix` 全量格式化(8 文件,6 处 warning 修复) -- `CONTRIBUTING.md` 新增 sccache 构建加速指南 - -### Fixed - -- `cargo clippy --all-targets -D warnings` — 7 warnings → 0 -- `cargo fmt --check` — 全量通过 - -## [0.14.2] - 2026-05-02 - -### Changed - -- health dirty 检测修复(排除 ignored 文件) -- scan 路径规范化 + syncthing-rust 识别修复 -- experiment_log / CodeMetrics / ModuleGraph / CallGraph / DeadCode 提升为 Beta tier -- 48 tools: Stable 5 / Beta 40 / Experimental 3 - -## [0.14.1] - 2026-05-01 - -### Added - -- CLI JSON 输出补全 (`--json` / `--recalc`) -- relations MCP 工具加固 -- License headers 全量补录 -- Vault Daily / Vault Graph MCP tools - -## [0.14.0] - 2026-04-28 - -### Added - -- Workspace 拆分:6 个零耦合 crate 提取 -- MCP trait 化:`mcp/tools/repo.rs` `crate::` 引用 68→41 - -## [0.13.0] - 2026-04-26 - -### Added - -- Registry God Object 拆解:10 子模块提取为 free function -- `WorkspaceRegistry` 退化为纯 facade - -## [0.12.0] - 2026-04-30 - -### Added - -- **Schema v22** — drop `vault_notes`, `papers`, `workflows` orphan tables; `entities` becomes sole source of truth for all entity types -- **Managed-Gate Fail-Safe Defaults** — `devbase sync` defaults to managed repos only - - Management tags: `mirror`, `reference`, `third-party`, `collaborative`, `team`, `own-project`, `tool`, `active`, `managed` - - Untagged / non-management repos are registered but skipped by default sync - - `--filter-tags` bypasses the gate for explicit selection -- **`.devbase-ignore`** — directory-level opt-out exclusion during scan -- `scan --register` no longer auto-tags repos with `"discovered"` -- i18n hint for unmanaged repos - -### Changed - -- `inspect_repo`: remove `"discovered"` from default tags; `-main`/`-master` repos keep `zip-snapshot` + `needs-migration` -- `collect_tasks`: default mode filters by management tags -- All `list_workflows` / `list_papers` / `list_vault_notes` queries migrated to `entities` table + `json_extract` -- Generic `upsert_entity` abstraction for entity dual-write -- `ENTITY_TYPE_*` constants extracted across 10 files (~25 replacements) -- `cargo test --lib`: 374 → 379 passed - -### Breaking Changes - -- Existing repos tagged `"discovered"` are **no longer synced by default**. - Use `devbase tag managed` (or any management tag) to opt a repo into automatic sync. - -## [0.10.0] - 2026-04-26 - -### Added - -- **L3 Risk Layer MVP** — `known_limits` 表 + Registry CRUD + MCP tools + CLI subcommand - - Schema v18: `known_limits` 表(id, category, description, source, severity, first_seen_at, last_checked_at, mitigated) - - Registry CRUD: `save`/`get`/`list`/`delete`/`resolve`/`seed_hard_vetoes` - - MCP tools: `devkit_known_limit_store` / `devkit_known_limit_list`(Beta tier) - - CLI: `devbase limit {add,list,resolve,delete,seed}` - - OpLog 集成: create/update/resolve/delete/seed 自动写入 oplog(event_type = `KnownLimit`) - - Hard Veto 种子: AGENTS.md 中的 5 条硬约束自动填充 -- **L4 元认知层 MVP** — `knowledge_meta` 表 + L3-L4 联动 - - Schema v19: `knowledge_meta` 表(id, target_level, target_id, correction_type, correction_json, confidence, created_at) - - Registry CRUD: `save`/`get`/`list`/`delete` - - CLI 联动: `devbase limit resolve --reason "..."` 自动创建 L4 meta 记录 -- **Hard Veto 运行时守卫** — Skill 执行前自动检查未解决 hard veto - - `skill_runtime::executor::run_skill` 执行前查询 `known_limits` - - 未解决 hard veto 存在时,警告注入 `stderr`,同时写入 OpLog - - 零破坏性:skill 仍执行成功,但输出中包含 `[HARD-VETO-WARNING]` - -### Changed - -- `cargo test --all-targets`: 279 → 288 passed -- MCP tool 总数: 35 → 37 - -## [0.11.3] - 2026-04-26 - -### Changed - -- **Phase 1 主从表切换 — Stage 3 完成**(`repos` 表删除) - - `save_repo` / `update_repo_*` / `run_clean` 不再写入 `repos` - - Schema v21 迁移:重建 11 个子表(去 FK)→ 删除 `repos` 表 - - `test_helpers.rs` SCHEMA_DDL 同步去 `repos` + 去 FK - - `entities` 成为真正的读写唯一数据源 - -## [0.11.2] - 2026-04-26 - -### Changed - -- **Phase 1 主从表切换 — Stage 2 完成**(读路径迁移) - - `list_repos` / `list_repos_stale_health` / `list_repos_need_index` / `list_workspaces_by_tier` 全部改为从 `entities` 读取(`json_extract`) - - `digest.rs` / `health.rs` / `daemon.rs` / `backup.rs` / `knowledge_engine.rs` / `sync/*.rs` / `tui/state.rs` / `mcp/tools/repo.rs` 等所有 `list_repos()` 调用方自动迁移 - - 直接 SQL 查询迁移:`dependency_graph.rs`, `registry/links.rs`, `registry/knowledge.rs`, `query.rs`, `oplog_analytics.rs`, `commands/simple.rs` - - `update_entity_metadata_field` 修复 `json_set` 字符串引号问题:原始字符串直接传递,`"null"` 时自动 `json_remove` - - `repo_tags` / `repo_remotes` 子表保留,通过 `repo_id` JOIN 读取(FK 仍指向 `repos`) - -## [0.11.1] - 2026-04-26 - -### Changed - -- **Phase 1 主从表切换 — Stage 0 完成**(entities 第一公民前置) - - Schema v20: Flat ID 命名空间迁移(`repo:devbase` → `devbase`,`skill:xxx` → `xxx`) - - `sync_repo_to_entities_by_id` 重构为 `upsert_entity_for_repo`:直接由 `RepoEntry` 写入 entities,不再读取 repos - - `update_repo_*` 改为先写 entities metadata(`json_set`),再写 repos - - `save_repo` 写入顺序反转:entities → repos → repo_tags → repo_remotes - - `run_tag` 补全 entities 双写:`sync_repo_tags_to_entity` - - `run_clean` 改为先删 entities,再删 repos(保留 CASCADE 行为) - - Skill entities 同步同理去除 `skill:` 前缀 - -## [0.11.0] - 2026-04-26 - -### Added - -- **AppContext Pool 化** — 全链路数据库连接池统一 - - `AppContext` 持 `r2d2::Pool`,替代单 `Connection` - - `scan`/`health`/`sync`/`backup`/`daemon`/`query` 等深层模块全部迁移 - - `init_db()` 调用点从 89 处降至 5 处合法保留(Pool 前 schema 引导 ×2、migrate 定义 ×1、workflow 测试辅助 ×2) - - 根治 `spawn_blocking` / `thread::spawn` 闭包无法传递裸 `Connection` 的问题 -- **MCP 测试隔离** — 全部 MCP 集成测试改用临时目录 - - `DEVBASE_DATA_DIR` 指向 `tempfile::TempDir` + `AppContext::with_defaults()` - - 多线程并发测试全部通过,无 flaky -- **Search 测试竞态自愈** — `SEARCH_TEST_LOCK` + 临时目录隔离,多线程 (`--test-threads=4`) 稳定通过 - -### Changed - -- `cargo test --all-targets`: 288 → 374 passed(+86 个新增/迁移测试) -- CI 测试并行度: `--test-threads=1` → `--test-threads=4`,回归测试耗时 ~13s → ~4s -- `rusqlite` 0.34 + `r2d2_sqlite` 0.27.0 版本锁定 - -## [0.9.0] - 2026-04-26 - -### Added - -- **Workflow Loop Step 完整执行** — 5 种 step 类型全部可执行 - - `StepType::Loop { for_each, body }`:遍历集合,执行 body 子步骤 - - 变量插值:`${loop.item}` / `${loop.index}` - - 结果聚合:stdout 按迭代索引标记,outputs 合并 - - 失败处理:单迭代失败按 body step 的 `on_error` 策略处理 -- **12 个新增单元测试** — model/interpolate/validator/executor 全覆盖 - -### Changed - -- `cargo test --all-targets`:267 → 279 passed - -## [0.8.0] - 2026-04-25 - -### Added - -- **Workflow 子类型执行** — Subworkflow / Parallel / Condition 全部可执行 - - `execute_subworkflow_step`:递归调用 `execute_workflow` - - `execute_parallel_step`:子步骤串行执行 + 结果聚合 - - `execute_condition_step`:字符串插值后 true/false 评估 -- **NLQ 自然语言查询结果可执行** — TUI `[:]` 搜索结果按 Enter 直接运行 skill -- **NLQ smoke test** — `run_nlp_selected_skill` 空列表/无技能/执行管道测试 -- **TUI SkillPanel 拆分** — `SkillPanelState` 提取 7 个字段,App 51→44 字段 - -### Fixed - -- 29 个生产代码 unwrap 全部清零 -- 30 个 clippy 警告清零 - -## [0.7.0] - 2026-04-20 - -### Added - -- **NLQ 自然语言查询** — TUI `[:]` 触发 embedding 语义搜索,fallback 降级文本搜索 -- **智能同步建议** — `sync/policy.rs::recommend_sync_action` 基于 safety/ahead/behind 生成建议 - -## [0.6.0] - 2026-04-18 - -### Added - -- **Mind Market 评分系统** — `skill_runtime::scoring` - - `success_rate` + `usage_count` + `rating`(0-5 分公式) - - CLI:`skill recalc-scores` / `skill top` / `skill recommend` -- **TUI Workflow 执行** — `[w]` 详情页 `r/Enter` 运行 + 结果弹窗 - -## [0.5.0] - 2026-04-17 - -### Added - -- **Workflow Engine v0.5.0** — YAML 编排多步骤自动化 - - 5 种 step 类型:skill / subworkflow / parallel / condition / loop - - 拓扑调度(Kahn 算法)+ batch 并行执行 - - 变量插值:`${inputs.x}` / `${steps.y.outputs.z}` - - 错误策略:Fail / Continue / Retry / Fallback - - Schema v17:`workflows` + `workflow_executions` 表 -- **CLI/TUI Workflow 集成** — `devbase workflow {list,show,register,run,delete}` + `[w]` 面板 - -## [0.4.0] - 2026-04-15 - -### Added - -- **Schema v16 统一实体模型** — `entity_types` + `entities` + `relations` 表,渐进双写 -- **Skill 自动封装** — `devbase skill discover ` 自动分析项目 CLI/API,生成 SKILL.md -- **Git URL Discover** — `devbase skill discover https://github.com/...` 克隆+分析+注册 -- **MCP `devkit_skill_discover`** — 35 tools 总数 - -## [0.3.0] - 2026-04-12 - -### Added - -- **34 MCP tools 全量通过 MCP Inspector** -- **README Quick Start 三步内跑通** -- **CI/CD** — `.github/workflows/ci.yml`(check / test / fmt / clippy on Windows) -- **GitHub Release 预编译二进制** - -## [0.2.4] - 2026-04-20 - -### Architecture - -- **Outboard Brain Embedding Architecture** — Embedding generation moved to external Skill/MCP Server - - `embedding.rs` stripped of Ollama/OpenAI generation logic; storage protocol only (`embedding_to_bytes`, `bytes_to_embedding`, `cosine_similarity`) - - `knowledge_engine.rs` no longer generates embeddings during indexing - - Aligns with "store + search in devbase, compute in Clarity/Skill" boundary - -### Changed - -- **Breaking** — `devkit_semantic_search` now accepts `query_embedding: number[]` instead of `query: string` - - Embedding generation is the caller's responsibility (external MCP Server or Skill) - - Removed `config.embedding.enabled` gate; search works as long as embeddings exist in DB - -### Added - -- **`devkit_embedding_store`** — Store externally-generated embedding vectors into SQLite - - Parameters: `repo_id`, `symbol_name`, `embedding: number[]` - - Upsert semantics (ON CONFLICT UPDATE) -- **`devkit_embedding_search`** — Alias for `devkit_semantic_search` with vector-based interface - - Same parameters and behavior, alternative name for workflow clarity -- **MCP tool count**: 25 → 31 - -## [0.2.4] - 2026-04-20 (continued) - -### Added - -- **`devkit_hybrid_search`** — Hybrid vector + keyword search via RRF merge (Beta) - - `search::hybrid.rs`: `rrf_merge()` (Reciprocal Rank Fusion, k=60), `keyword_search_symbols()` (SQLite LIKE on name/signature), `hybrid_search_symbols()` (auto-fallback to keyword when embedding missing) - - `registry::knowledge::hybrid_search_symbols()` wrapper - - Recommended default search tool for code concept discovery -- **`devkit_cross_repo_search`** — Cross-repository symbol search filtered by tags (Beta) - - `registry::knowledge::cross_repo_search_symbols()`: INTERSECT-based tag filtering (AND semantics), per-repo hybrid search, global dedup+sort - - Searches all repos matching ALL specified tags -- **`devkit_knowledge_report`** — Workspace knowledge coverage report (Beta) - - `src/oplog_analytics.rs`: `generate_report()` with table-existence guards for resilient querying - - Reports: repo_count, total_symbols, total_embeddings, total_calls, coverage_pct, per-repo breakdown, health_summary, recent_activity -- **`devkit_related_symbols`** — Explicit symbol-to-symbol knowledge links (Experimental) - - Schema v13: `code_symbol_links` table (source_repo, source_symbol, target_repo, target_symbol, link_type, strength) - - `src/symbol_links.rs`: `compute_similar_signature_links()` (Jaccard token overlap), `compute_co_located_links()` (same-file clustering) - - `generate_and_save_links()`: persists links with ON CONFLICT IGNORE upsert -- **External Embedding Provider** — Reference Python implementation in `examples/embedding-provider/` - - `index.py`: Ollama `/api/embeddings` client, batch generation, cross-platform registry DB path - - Byte-compatible f32 little-endian serialization via `struct.pack` - - CLI: `--repo-id`, `--model`, `--ollama-url`, `--batch-size`, `--force` -- **Schema v13** — `code_symbol_links` table for explicit conceptual relationships - -### Engineering - -- **Context Safety Mechanism** — Formalized as long-term architecture principle - - Sub-agent execution: serial + commit-isolated work directories (prevents compilation races) - - MCP tool idempotency: all state-mutating tools use ON CONFLICT UPDATE / transaction boundaries - - OpLog as immutable audit trail for all state transitions - ---- - -## [0.2.3] - 2026-04-20 - -### Added - -- **Semantic Vector Search (Wave 1)** — Cosine-similarity code symbol search - - `code_embeddings` table (Schema v11): `repo_id + symbol_name` PK, BLOB embedding, `generated_at` - - `embedding.rs`: Ollama/OpenAI-compatible generation + `cosine_similarity` + byte serialization - - `devkit_semantic_search` MCP tool (Beta): natural-language → embedding → top-K symbols -- **Multi-Language Symbol Extraction (Wave 2)** — tree-sitter AST parsing beyond Rust - - `tree-sitter-python`, `tree-sitter-typescript`, `tree-sitter-go` dependencies - - `SymbolType` expanded: Function, Struct, Enum, Trait, Impl, Module, Class, Interface, TypeAlias, Constant, Static - - Per-language call-target resolvers for Call Graph construction - - Languages supported: Rust, Python, JavaScript, TypeScript, Go -- **Call Graph Analysis** — Intra-repo function call relationship extraction - - `code_call_graph` table (Schema v10): caller → callee edges with line numbers - - `devkit_call_graph` MCP tool: "Who calls `register_tool`?" -- **Cross-Repo Dependency Graph expansion** - - `CMakeLists.txt` parsing: `find_package`, `add_subdirectory`, `FetchContent_Declare`, `target_link_libraries` - - `ManifestKind::CMake` added to dependency graph builder -- **Dead Code Detection** — `devkit_dead_code` MCP tool (Experimental) - - SQL `NOT EXISTS` query over call graph to find functions with zero incoming edges - - `LIKE 'pub%fn%'` heuristic to exclude non-public functions -- **arXiv Integration** — Pure string-parsing Atom XML fetcher (zero heavy XML deps) - - `arxiv.rs`: `PaperMetadata` with title/authors/summary/published/category - - `devkit_arxiv_fetch` MCP tool (Beta): fetch by arXiv ID -- **Performance Benchmarks** — Criterion suite (`benches/semantic_index.rs`) - - `index_repo_full` (small/medium/full parameterization) - - `cosine_similarity` (128/512/768 dims) - - `extract_symbols` (Rust/Python/Go comparison) - - `parse_cmake_lists` (CMake parsing) -- **Structured OpLog (Schema v12)** — Typed event system - - `OplogEventType` enum replacing free-text `operation` field - - JSON metadata + `duration_ms` for observability - - Migration: `CASE` mapping from legacy strings to enum variants - -### Fixed - -- **`scan` async panic** — `fetch_github_stars` now runs in `std::thread::spawn` isolation - - Prevents `reqwest::blocking::Client` drop inside tokio runtime from causing panic - - `block_on_async()` helper detects runtime context and uses `mpsc` or temporary runtime -- **Dead code false positives** — `pub fn` → `pub%fn%` SQL LIKE match covers `pub async fn` / `pub(crate) fn` / `pub unsafe fn` - - Excludes `main()` from dead code results -- **Clippy warnings** — 12+ lints resolved (`manual_strip`, `collapsible_if`, `FromStr`, `type_complexity`, `useless_format`, etc.) - -### Changed - -- **`nl_filter_repos`** — Now uses Tantivy full-text search as primary path - - Falls back to structured SQL filtering when Tantivy is unavailable - ---- - -## [0.2.2] - 2026-04-21 - -### Added - -- **Vault Backlinks** — Find notes that link to a given note - - `vault::backlinks:` query prefix - - TUI detail panel shows "被引用" section with backlink count and list - - MCP tool `devkit_vault_backlinks` — AI can discover note relationships - - `vault/backlinks.rs` with `build_backlink_index()` and `get_backlinks()` - -### Changed - -- **Schema v8** — `vault_notes` table no longer has `content` column - - Migration: auto-creates `vault_notes_v2`, migrates data, drops old table - - `save_vault_note` / `list_vault_notes` SQL updated to 8 columns - - Filesystem-first architecture now complete at the database level - -## [0.2.1] - 2026-04-20 - -### Added - -- **Vault Watch** — Filesystem watcher for `workspace/vault/` - - Auto-refresh TUI vault list when notes are edited externally - - 500ms debounce to avoid excessive reloads -- **Vault Tantivy Search** — `vault:` queries now use Tantivy full-text index - - Replaces slow SQLite LIKE + per-file reading - - Supports keyword scoring and ranking -- **MCP Registry Manifest** — `server.json` for official MCP Registry submission - -### Changed - -- `query.rs` vault branch: uses `search_vault()` instead of in-memory filtering - -## [0.2.0] - 2026-04-20 - -### Added - -- **Vault System** — Markdown note management with Obsidian-compatible PARA structure - - `vault/` directory with PARA folders: 00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - - Filesystem-first architecture: note content lives in `.md` files, SQLite only indexes metadata - - YAML frontmatter parsing (title, tags, aliases, date) - - WikiLink `[[...]]` extraction and backlink index building -- **TUI Vault View** — Press `Tab` to switch between Repo list and Vault note list - - Vault list shows note titles with tag indicators - - Detail panel previews note content (first 20 lines), tags, and outgoing links - - `Enter` opens selected note in VS Code -- **MCP Vault Tools** — 3 new tools for AI Agent vault interaction - - `devkit_vault_search` — full-text search across vault notes - - `devkit_vault_read` — read note content and frontmatter by path - - `devkit_vault_write` — write or append to vault notes -- **P2-lite: repos.toml** — Optional static configuration override for repositories - - Declare tags, tier, and workspace_type in `workspace/repos.toml` - - Overrides are applied on top of auto-discovered repo metadata -- **Unified Node Model** — `core::node::{Node, NodeType, Edge}` abstraction - - `NodeType::GitRepo | VaultNote | Asset | ExternalLink` - - Foundation for future Knowledge Graph unification -- **Workspace Directory** — `%LOCALAPPDATA%/devbase/workspace/` with `vault/` and `assets/` -- **MCP Client Config** — `mcp.json` for Claude Desktop / Cursor integration - -### Changed - -- **Architecture principle**: File system = source of truth; SQLite/Tantivy = derived index/cache -- Vault notes no longer store `content` in SQLite (read from disk on demand) - -## [0.1.0] - 2026-04-20 - -### Added - -- **TUI Dashboard** — Terminal UI for multi-repository workspace management - - Repository list with status icons, stars, and tag indicators - - Detail panel with Overview / Health / Insights tabs - - Stars Trend sparkline (30-day history) - - Help Overlay with categorized keyboard shortcuts - - Responsive layout: compact / standard / wide screen modes - - Cross-repository code search (ripgrep + Tantivy dual mode) - - One-key launch into gitui / lazygit -- **MCP Server** — 14 tools for AI Agent integration (stdio transport) - - `devkit_scan`, `devkit_health`, `devkit_sync`, `devkit_query_repos` - - `devkit_code_metrics`, `devkit_module_graph`, `devkit_natural_language_query` - - `devkit_index`, `devkit_query`, `devkit_note`, `devkit_digest` - - `devkit_github_info`, `devkit_paper_index`, `devkit_experiment_log` -- **Safe Sync Engine** — Four-tier sync policies: Mirror / Conservative / Rebase / Merge - - Pre-sync safety assessment (dirty, diverged, detached HEAD detection) - - Dry-run preview with per-repo recommendations - - Async batch sync with concurrency control and timeout -- **Registry & Indexing** — SQLite-backed workspace registry - - Automatic Git + non-Git workspace discovery - - Schema migrations with automatic backup snapshots - - GitHub Stars cache with TTL and historical tracking - - Tantivy full-text index for repository knowledge search -- **Health Monitoring** — Workspace-wide health checks - - Git status tracking (dirty / ahead / behind / diverged) - - Blake3 hash snapshots for non-Git workspaces - - Environment tool version detection -- **i18n** — Chinese and English bilingual support -- **CI/CD** — GitHub Actions workflow for check, test, fmt, clippy on Windows - -### Engineering - -- Modular architecture: 22 crates modules with clear separation of concerns -- Dual lib+bin mode: `lib.rs` exports all modules for programmatic use -- Theme system with semantic color tokens (dark/light ready) -- Render layer split from monolithic 1026-line file into 6 focused submodules - -### Security - -- `cargo audit` clean (0 vulnerabilities in direct dependencies) - -[0.20.1]: https://github.com/juice094/devbase/releases/tag/v0.20.1 -[0.20.0]: https://github.com/juice094/devbase/releases/tag/v0.20.0 -[0.19.0]: https://github.com/juice094/devbase/releases/tag/v0.19.0 -[0.18.0]: https://github.com/juice094/devbase/releases/tag/v0.18.0 -[0.1.0]: https://github.com/juice094/devbase/releases/tag/v0.1.0 +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- **FTS5 技能全文搜索** (Schema v35) — `skills_fts` 虚拟表 + 触发器,`search_skills_text()` 使用 BM25 排序(name=1.0, desc=0.8, tags=0.4, category=0.2),LIKE 降级 fallback +- **可插拔外部技能源** (Schema v36) — `SkillSource` trait + `GitHubSource` / `LocalFileSource`,`sync_sources` / `sync_log` 审计表,`devkit_skill_sync` MCP 工具(Beta tier),`devbase skill import` CLI 子命令 +- **Vault 多根目录 + symlink 跟随** — `VaultConfig`(roots + follow_symlinks),多根目录扫描 `scan_vault_with_options`,`resolve_vault_write_path` 实体回溯路径解析,`devbase vault sync` CLI +- **Ontology 导入** — `devkit_ontology_import` MCP 工具(Beta tier),`devbase ontology` CLI(`--dry-run` 预览),支持 OpenClaw workspace `ontology/entities/*.json` + `ontology/relations/*.jsonl` 批量导入 +- MCP 工具数: 69 → **71**(5 stable / 58 beta / 8 experimental) +- `devkit_document_convert` — Experimental tier MCP tool,PDF/PPTX → Markdown 转换(`pdftotext` / `python-pptx` 流水线),含 frontmatter 质量标注 +- Stable 工具 invocation 测试补全:`devkit_query_repos`、`devkit_vault_search`、`devkit_vault_read`、`devkit_status`、`devkit_workflow_list`、`devkit_index` +- `seed_repo()` 轻量测试 helper(仅插入 `entities` 表,无副作用) + +### Fixed + +- `mcp/tools/document_convert.rs` 原始字符串定界符修复(`r###"` 避免与 Python f-string `"##` 冲突) +- `cleanup_extracted_text` 单元测试期望值与实现语义对齐(保留最多 2 个连续空行) + +### Changed + +- **Workspace crate 架构重组** — 消除机械提取造成的微 crate 碎片 + - 合并 8 个 `devbase-registry-*` 微 crate(100–300 行/个)为统一 `devbase-registry`,含 8 个语义子模块(`entity`, `health`, `metrics`, `relation`, `call_graph`, `code_symbols`, `dead_code`, `workspace`) + - 拆分 10+ 个 monolithic `lib.rs` 为域驱动子模块:`devbase-embedding` (`candle`/`ollama`), `devbase-workflow-model` (`definition`/`execution`/`step_type`), `devbase-symbol-links` (`similarity`/`co_located`), `devbase-sync-protocol` (`index`/`version_vector`), `devbase-skill-runtime-types` (`skill_type`/`execution`/`params`), `devbase-skill-runtime-parser` (`frontmatter`/`field_parsers`), `devbase-workflow-interpolate` (`resolver`), `devbase-vault-frontmatter` (`parser`), `devbase-vault-wikilink` (`parser`), `devbase-core-types` (`node_type`/`node`/`edge`) + - 全 workspace `Cargo.toml` 统一使用 `[workspace.package]` 继承(`version`, `edition`, `authors`, `license`, `repository`) +- `KNOWN_ISSUES.md` 更新:document_convert 从 P3 债务移至已解决归档;测试计数 485→494 +- `docs/reference/mcp-tools.md` 修正为 69 个工具,补充 Index / Workflow / Relation / KnownLimit / Session 分类 +- `docs/reference/stable-tools/README.md` 修正为 5 个 Stable 工具(删除过时的 `project_brief.md` / `hybrid_search.md` / `session_recall.md`) + +## [0.20.1] - 2026-05-17 + +### Added + +- **Phase 1 Production Hardening** + - Workflow E2E 测试 — `src/mcp/tools/workflow.rs`:DAG 成功执行、失败传播验证 + - RF-7 路径隐私脱敏 — `sanitize_path()` 自动掩码 home 目录为 `~` + - Tantivy 一致性修复 — `repair_tantivy_consistency_at()` 启动时自动检测 orphan/missing 文档 + - 性能回归基线 — `test_keyword_search_latency_regression_1k` / `_10k`(profile-aware 阈值) + - `TempStorageBackend` — 测试隔离后端,消除 `DEVBASE_DATA_DIR` 竞态 +- **Architecture Invariants CI 自动化** — `scripts/invariant-checks/run-checks.ps1` + - G5 (RF-6):diff-only 检测新增生产代码 `unwrap`/`expect`/`panic`(排除 `#[cfg(test)]`) + - T11:`mcp/tools` 禁止直接调用 `rusqlite::Connection` + - T12:`tui/render` 纯消费检查(禁止写入操作) + +### Fixed + +- `AppContext::with_storage()` 使用实际 storage backend 的 `index_path()` 而非硬编码默认值 +- G5 invariant checker 正则修复:`tests.rs` 文件正确跳过 +- `Cargo.lock` 同步版本 bump(修复 `--locked` release 构建失败) +- 平台相关测试隔离:`C:\` 路径断言加 `#[cfg(windows)]`,Linux `python3` 断言适配 +- HuggingFace 网络依赖测试加 `#[ignore]`(避免 CI TLS 证书失败) + +## [0.20.0] - 2026-05-16 + +### Added + +- **知识完备性**:Vault 双向链接图遍历(BFS depth 1-3)+ `[[note#heading]]` block 引用 +- **Vault 笔记历史追踪** — Git-based blob diff,`devkit_vault_history` tool +- **混合检索质量监控** — `HybridSearchMetrics`(latency/recall/overlap/keyword_source) +- **性能回归基线** — Criterion benchmarks:`index_repo_full`、`cosine_similarity`、`extract_symbols` +- **客户端无关原则** — `StorageBackend` trait 完整实现,解耦 `dirs::data_local_dir()` 硬编码 +- **MCP Tools +4** (68 total) + - `devkit_vault_history`, `devkit_vault_export`, `devkit_vault_graph`, `devkit_vault_daily` + +### Changed + +- 20+ 独立 crate 零循环依赖,workspace 拆分完成 +- `entities` 表成为唯一真相源,`repos` 表彻底删除 +- Tantivy / SQLite 补偿扫描:启动时自动同步 orphan 文档 + +## [0.19.0] - 2026-05-14 + +### Added + +- **SQLite WAL 模式** — `r2d2` 连接池 + WAL journal,并发安全与增量备份 +- **Tantivy 健康评分** — `devkit_index_health`:损坏检测、自动重建、孤儿文档清理 +- **Vault 导出** — `devkit_vault_export`:Obsidian-compatible Markdown 批量导出 +- **Redis ADR 决策** — `docs/architecture/adr-003-redis.md`:评估后决定保持 SQLite 优先 +- **OpLog 审计追踪** — 结构化事件类型 `OplogEventType`,全操作不可变日志 + +### Changed + +- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 +- 索引层反向一致性扫描与自动修复能力 + +## [0.18.0] - 2026-05-13 + +### Added + +- **ClaudeCode 工作流集成** — `docs/RFC/claudecode-workflow-integration.md` + - `devkit_project_brief` — 生成项目 Markdown 简报(架构 + 模块 + 近期提交 + 已知约束),用于 `.claude/CLAUDE.md` 注入 + - `devkit_impact_analysis` — 符号级变更影响半径分析(BFS 调用图遍历 + 相关符号发现 + 测试启发式 + 历史 oplog) + - `scripts/devbase-claude.ps1` — PowerShell 一键启动器:自动检测 repo → 生成简报 → 注入 `.claude/CLAUDE.md` → 启动 `claude` → 可选捕获退出 diff +- **Session 导入/导出工具** + - `devkit_session_export` — 导出会话为 Markdown / JSON;支持记忆类型图标与元数据 + - `devkit_session_import` — 从 bulk text 批量导入记忆(`[type]` 前缀解析) +- **MCP Tools +4** (64 total) + - `devkit_project_brief`, `devkit_impact_analysis`, `devkit_session_export`, `devkit_session_import` +- **TUI Session 视图硬化** + - 三态 MainView 切换:`RepoList → VaultList → Session`(`Tab` 键循环) + - Session 列表:状态图标(● active / ◌ archived)+ 高亮样式 + - Session 详情:记忆类型图标(◆ decision / ▪ constraint / ★ discovery / ✗ error)+ embedding model 标签 + indexed 状态 +- **AGENTS.md** 同步至 v0.18.0-dev 基线(64 Tools / 437 tests) + +### Changed + +- `src/mcp/mod.rs` Tool 注册表扩展至 64 工具(稳定 + Beta) +- `src/mcp/tests.rs` 工具计数断言同步 +- TUI `render_session.rs` / `state/mod.rs` 适配 Schema v34 记忆字段(`embedding_model`, `indexed_at`) + +## [0.17.0] - 2026-05-13 + +### Added + +- **Agent Memory 向量存储** — Schema v34 + - `agent_memories` 新增 `embedding BLOB`, `embedding_model TEXT`, `indexed_at DATETIME` + - Partial index `idx_agent_memories_embedding` 仅索引含向量的行 + - `AgentMemory` 结构体扩展向量元数据字段 +- **SQLite UDF: `cosine_similarity`** — `src/registry/agent_context.rs` + - 输入: 两个 little-endian f32 BLOB + - 输出: REAL ∈ [-1.0, 1.0] + - 注册时机: `WorkspaceRegistry::init_db_at` 迁移完成后自动注册 +- **语义记忆搜索** — `search_memories_semantic(context_id, query_embedding, limit)` + - 纯 SQL `ORDER BY cosine_similarity(embedding, ?) DESC` + - 零 LLM 运行时依赖;仅执行向量比对 +- **MCP Tools +2** (60 total) + - `devkit_session_recall` — 外部向量查询 + 语义召回 top-k memories + - `devkit_session_index` — 为已有 memory 注入外部生成 embedding +- **Skill Runtime Auto-Recall** — `src/skill_runtime/executor.rs` + - Tier 1: Semantic recall (本地 Candle/Ollama 或外部 HTTP endpoint) + - Tier 2: Keyword fallback (`LIKE` search on `content`) + - 新环境变量: `DEVBASE_CONTEXT_MEMORY_COUNT`, `DEVBASE_CONTEXT_RECALL_METHOD` + - `DEVBASE_CONTEXT_MEMORIES` 升级为 top-k 相关 memories(含 `score` + `model`) +- **外部 Embedding Provider 集成** + - `call_external_embedding_endpoint` — `reqwest::blocking` POST `/api/embeddings` + - 配置驱动: `config.toml [embedding]` (enabled/provider/model/base_url/timeout) + - 端到端测试: mock TCP server 验证 Ollama 格式解析 + 错误码处理 +- **RFC 文档** — `docs/RFC/agent-memory-vector-storage.md` + - 架构决策: devbase = 向量数据库层,不做 embedding 生成 + - 参照 pgvector 边界设计 + +### Changed + +- **Feature Flags**: `embedding` 从 `default` 移除 + - Candle/Ollama 依赖变为 opt-in: `--features embedding` + - 默认构建零 ML 依赖,编译时间减少 30~50% +- `insert_memory` 签名扩展: 新增可选 `embedding: Option<&[f32]>` 和 `embedding_model: Option<&str>` +- `list_memories` / `search_memories` SELECT 语句扩展为 8 列(兼容新增字段) +- AGENTS.md 同步至 v0.17.0-dev 基线 + +### Breaking Changes + +- 默认构建不再包含 `devbase-embedding` crate;需要语义生成能力的用户须显式启用 `--features embedding` +- `generate_query_embedding` 在默认构建下返回错误(提示启用 feature 或配置外部 endpoint) + +## [0.16.1] - 2026-05-13 + +### Added + +- **Workflow-Session Binding** — Schema v33 + - `workflow_executions` 新增 `context_id` 列 + 索引 + - `create_execution` 自动绑定 `resolve_active_context()` + - MCP `devkit_workflow_run` 与 CLI `workflow run` 均支持自动绑定 + - `devkit_session_workflows` tool: 列出指定 context 的 workflow 执行历史 +- `context_entity_links` 表 (Schema v32): context 与任意 entity 的多对多关联 + +## [0.16.0] - 2026-05-13 + +### Added + +- **Agent Contexts (AI Agent OS)** — Schema v31 + - `agent_contexts` 表: 持久化 AI session / project scope + - `agent_memories` 表: 结构化记忆(decision/constraint/note/discovery/error) + - 9 个 Session MCP tools: save/list/resume/attach/detach/activate/search/capture/workflows + - `resolve_active_context()`: 环境变量 `DEVBASE_ACTIVE_CONTEXT` → 文件 `.active_context` fallback + - Context-aware Skill Runtime: 注入 `DEVBASE_ACTIVE_CONTEXT` + `DEVBASE_CONTEXT_MEMORIES` + `DEVBASE_CONTEXT_LINKS` + - 所有 agent_context 操作自动写入 OpLog (`OplogEventType::AgentContext`) + +## [0.15.0] - 2026-05-04 + +### Added + +- **P1: Tantivy BM25 代码符号搜索** — `search/symbol_index.rs` + - 独立 Schema (`repo_id`, `name`, `signature`, `file_path`, `line_start`) + - `keyword_search_symbols` 主路径走 Tantivy BM25,SQLite LIKE 回退 + - 索引流程 `index.rs` 自动同步写入 symbol_index + - `StorageBackend` 扩展 `symbol_index_path()`(6 实现) +- **P3: Embedding 多后端** — Candle (默认) + Ollama (配置切换) + - 新增 `OllamaProvider` (`ureq` HTTP `/api/embed`) + - `create_provider(backend, model, base_url, timeout)` 配置化创建 + - `generate_query_embedding` 通过 `OnceLock` 懒加载配置化 provider + - 默认模型改为 `all-minilm` (384-dim,与 Candle 维度兼容) +- **P4: Health 环境检测扩展** — `EnvVersionCache` 从 5 工具 → 9 工具 + - 新增: `python`, `bun`, `zig`, `java` + - `get_tool_version` 支持 stderr fallback (Java 输出到 stderr) + - `fmt_version` 改进: Java 引号提取、Docker/Python 格式处理 +- **P5: 架构不变量自动化 CI** — `scripts/invariant-checks/run-checks.ps1` + - G5: diff-only 检测新增生产代码 unwrap/expect/panic(排除 `#[cfg(test)]`) + - T11: 检测 `mcp/tools/*` 直接调用 `rusqlite::Connection` + - T12: 检测 `tui/render/*` 写入操作 + - CI job `invariant-check` 加入 `.github/workflows/ci.yml` +- **P2 Phase 1: AppContext 职责拆分** — 6 个 Client trait impl 迁出 `storage.rs` + - `scan.rs` / `health.rs` / `sync.rs` / `digest.rs` / `knowledge_engine/mod.rs` / `registry.rs` + - `storage.rs` 860 → 430 行 (-50%) + - 删除冗余 `conn_mut()` +- **P2 Phase 2: 内联 SQL 下沉** — 新增 `registry/code_symbols.rs` + `registry/dead_code.rs` + - `CodeSymbolRow` / `DeadCodeRow` + 纯函数查询 (12 个单元测试) + - `RegistryClient` 退化为纯代理层 + +### Changed + +- `EmbeddingConfig` 默认模型 `nomic-embed-text` → `all-minilm` (384-dim) +- AGENTS.md 阶段描述更新: v0.14.3 → v0.15.0 推进中 → v0.15.0 全部完成 + +### Fixed + +- **TTL 缓存负值 bug** (`97172ec`): `elapsed < ttl_seconds` → `elapsed >= 0 && elapsed < ttl_seconds` + - 防止系统时间回溯导致缓存永不过期 +- `crates/devbase-embedding/src/lib.rs` 遗留 unwrap 清零 (`encode_with_candle` → `ok_or`) + +## [0.14.3] - 2026-05-05 + +### Added + +- **Schema v30** — `code_symbols.attributes` 列,tree-sitter 提取 `#[test]`/`#[tokio::test]` 等属性 + - `devkit_dead_code` 自动过滤测试函数,消除假阳性 + - `rust_node_to_symbol` 支持 `prev_sibling()` 回溯收集属性节点 +- **Tantivy/SQLite 补偿扫描** — 启动时自动检测并清理 orphan 文档 + - 新增 `search::sync_index_to_db(conn)`,对比 Tantivy `list_indexed_repo_ids()` 与 SQLite `entities` + - `AppContext` 初始化后自动调用,失败仅 warn 不阻塞启动 +- **Feature flags** — `mcp` + `embedding`,支持 `--no-default-features` 最小化编译 + - `default = ["tui", "mcp", "embedding"]` + - `devbase-embedding` 设为 `optional = true` + - 新增 `src/clients.rs` 提取 MCP client traits,避免 mcp feature 关闭时 trait 不可用 +- **Kimi CLI MCP 集成文档** — AGENTS.md 新增 Kimi CLI 集成状态,项目级 skill 位于 `.kimi/skills/devbase-project/` + +### Changed + +- **RF-1 架构红线** — `init_db()` 全局路径残留清零 + - `init_db()` 标记 `#[deprecated]`,新增 `init_db_with(backend: &dyn StorageBackend)` + - `workflow/executor.rs`、`workflow/state.rs`、`storage.rs` 全部改为注入式 + - `examples/` + `benches/` 中额外 5 处残留同步修复 +- `index_repo_full` 合并用户 `scan.exclude_patterns` 与默认排除模式 +- `cargo fmt` + `cargo clippy --fix` 全量格式化(8 文件,6 处 warning 修复) +- `CONTRIBUTING.md` 新增 sccache 构建加速指南 + +### Fixed + +- `cargo clippy --all-targets -D warnings` — 7 warnings → 0 +- `cargo fmt --check` — 全量通过 + +## [0.14.2] - 2026-05-02 + +### Changed + +- health dirty 检测修复(排除 ignored 文件) +- scan 路径规范化 + syncthing-rust 识别修复 +- experiment_log / CodeMetrics / ModuleGraph / CallGraph / DeadCode 提升为 Beta tier +- 48 tools: Stable 5 / Beta 40 / Experimental 3 + +## [0.14.1] - 2026-05-01 + +### Added + +- CLI JSON 输出补全 (`--json` / `--recalc`) +- relations MCP 工具加固 +- License headers 全量补录 +- Vault Daily / Vault Graph MCP tools + +## [0.14.0] - 2026-04-28 + +### Added + +- Workspace 拆分:6 个零耦合 crate 提取 +- MCP trait 化:`mcp/tools/repo.rs` `crate::` 引用 68→41 + +## [0.13.0] - 2026-04-26 + +### Added + +- Registry God Object 拆解:10 子模块提取为 free function +- `WorkspaceRegistry` 退化为纯 facade + +## [0.12.0] - 2026-04-30 + +### Added + +- **Schema v22** — drop `vault_notes`, `papers`, `workflows` orphan tables; `entities` becomes sole source of truth for all entity types +- **Managed-Gate Fail-Safe Defaults** — `devbase sync` defaults to managed repos only + - Management tags: `mirror`, `reference`, `third-party`, `collaborative`, `team`, `own-project`, `tool`, `active`, `managed` + - Untagged / non-management repos are registered but skipped by default sync + - `--filter-tags` bypasses the gate for explicit selection +- **`.devbase-ignore`** — directory-level opt-out exclusion during scan +- `scan --register` no longer auto-tags repos with `"discovered"` +- i18n hint for unmanaged repos + +### Changed + +- `inspect_repo`: remove `"discovered"` from default tags; `-main`/`-master` repos keep `zip-snapshot` + `needs-migration` +- `collect_tasks`: default mode filters by management tags +- All `list_workflows` / `list_papers` / `list_vault_notes` queries migrated to `entities` table + `json_extract` +- Generic `upsert_entity` abstraction for entity dual-write +- `ENTITY_TYPE_*` constants extracted across 10 files (~25 replacements) +- `cargo test --lib`: 374 → 379 passed + +### Breaking Changes + +- Existing repos tagged `"discovered"` are **no longer synced by default**. + Use `devbase tag managed` (or any management tag) to opt a repo into automatic sync. + +## [0.10.0] - 2026-04-26 + +### Added + +- **L3 Risk Layer MVP** — `known_limits` 表 + Registry CRUD + MCP tools + CLI subcommand + - Schema v18: `known_limits` 表(id, category, description, source, severity, first_seen_at, last_checked_at, mitigated) + - Registry CRUD: `save`/`get`/`list`/`delete`/`resolve`/`seed_hard_vetoes` + - MCP tools: `devkit_known_limit_store` / `devkit_known_limit_list`(Beta tier) + - CLI: `devbase limit {add,list,resolve,delete,seed}` + - OpLog 集成: create/update/resolve/delete/seed 自动写入 oplog(event_type = `KnownLimit`) + - Hard Veto 种子: AGENTS.md 中的 5 条硬约束自动填充 +- **L4 元认知层 MVP** — `knowledge_meta` 表 + L3-L4 联动 + - Schema v19: `knowledge_meta` 表(id, target_level, target_id, correction_type, correction_json, confidence, created_at) + - Registry CRUD: `save`/`get`/`list`/`delete` + - CLI 联动: `devbase limit resolve --reason "..."` 自动创建 L4 meta 记录 +- **Hard Veto 运行时守卫** — Skill 执行前自动检查未解决 hard veto + - `skill_runtime::executor::run_skill` 执行前查询 `known_limits` + - 未解决 hard veto 存在时,警告注入 `stderr`,同时写入 OpLog + - 零破坏性:skill 仍执行成功,但输出中包含 `[HARD-VETO-WARNING]` + +### Changed + +- `cargo test --all-targets`: 279 → 288 passed +- MCP tool 总数: 35 → 37 + +## [0.11.3] - 2026-04-26 + +### Changed + +- **Phase 1 主从表切换 — Stage 3 完成**(`repos` 表删除) + - `save_repo` / `update_repo_*` / `run_clean` 不再写入 `repos` + - Schema v21 迁移:重建 11 个子表(去 FK)→ 删除 `repos` 表 + - `test_helpers.rs` SCHEMA_DDL 同步去 `repos` + 去 FK + - `entities` 成为真正的读写唯一数据源 + +## [0.11.2] - 2026-04-26 + +### Changed + +- **Phase 1 主从表切换 — Stage 2 完成**(读路径迁移) + - `list_repos` / `list_repos_stale_health` / `list_repos_need_index` / `list_workspaces_by_tier` 全部改为从 `entities` 读取(`json_extract`) + - `digest.rs` / `health.rs` / `daemon.rs` / `backup.rs` / `knowledge_engine.rs` / `sync/*.rs` / `tui/state.rs` / `mcp/tools/repo.rs` 等所有 `list_repos()` 调用方自动迁移 + - 直接 SQL 查询迁移:`dependency_graph.rs`, `registry/links.rs`, `registry/knowledge.rs`, `query.rs`, `oplog_analytics.rs`, `commands/simple.rs` + - `update_entity_metadata_field` 修复 `json_set` 字符串引号问题:原始字符串直接传递,`"null"` 时自动 `json_remove` + - `repo_tags` / `repo_remotes` 子表保留,通过 `repo_id` JOIN 读取(FK 仍指向 `repos`) + +## [0.11.1] - 2026-04-26 + +### Changed + +- **Phase 1 主从表切换 — Stage 0 完成**(entities 第一公民前置) + - Schema v20: Flat ID 命名空间迁移(`repo:devbase` → `devbase`,`skill:xxx` → `xxx`) + - `sync_repo_to_entities_by_id` 重构为 `upsert_entity_for_repo`:直接由 `RepoEntry` 写入 entities,不再读取 repos + - `update_repo_*` 改为先写 entities metadata(`json_set`),再写 repos + - `save_repo` 写入顺序反转:entities → repos → repo_tags → repo_remotes + - `run_tag` 补全 entities 双写:`sync_repo_tags_to_entity` + - `run_clean` 改为先删 entities,再删 repos(保留 CASCADE 行为) + - Skill entities 同步同理去除 `skill:` 前缀 + +## [0.11.0] - 2026-04-26 + +### Added + +- **AppContext Pool 化** — 全链路数据库连接池统一 + - `AppContext` 持 `r2d2::Pool`,替代单 `Connection` + - `scan`/`health`/`sync`/`backup`/`daemon`/`query` 等深层模块全部迁移 + - `init_db()` 调用点从 89 处降至 5 处合法保留(Pool 前 schema 引导 ×2、migrate 定义 ×1、workflow 测试辅助 ×2) + - 根治 `spawn_blocking` / `thread::spawn` 闭包无法传递裸 `Connection` 的问题 +- **MCP 测试隔离** — 全部 MCP 集成测试改用临时目录 + - `DEVBASE_DATA_DIR` 指向 `tempfile::TempDir` + `AppContext::with_defaults()` + - 多线程并发测试全部通过,无 flaky +- **Search 测试竞态自愈** — `SEARCH_TEST_LOCK` + 临时目录隔离,多线程 (`--test-threads=4`) 稳定通过 + +### Changed + +- `cargo test --all-targets`: 288 → 374 passed(+86 个新增/迁移测试) +- CI 测试并行度: `--test-threads=1` → `--test-threads=4`,回归测试耗时 ~13s → ~4s +- `rusqlite` 0.34 + `r2d2_sqlite` 0.27.0 版本锁定 + +## [0.9.0] - 2026-04-26 + +### Added + +- **Workflow Loop Step 完整执行** — 5 种 step 类型全部可执行 + - `StepType::Loop { for_each, body }`:遍历集合,执行 body 子步骤 + - 变量插值:`${loop.item}` / `${loop.index}` + - 结果聚合:stdout 按迭代索引标记,outputs 合并 + - 失败处理:单迭代失败按 body step 的 `on_error` 策略处理 +- **12 个新增单元测试** — model/interpolate/validator/executor 全覆盖 + +### Changed + +- `cargo test --all-targets`:267 → 279 passed + +## [0.8.0] - 2026-04-25 + +### Added + +- **Workflow 子类型执行** — Subworkflow / Parallel / Condition 全部可执行 + - `execute_subworkflow_step`:递归调用 `execute_workflow` + - `execute_parallel_step`:子步骤串行执行 + 结果聚合 + - `execute_condition_step`:字符串插值后 true/false 评估 +- **NLQ 自然语言查询结果可执行** — TUI `[:]` 搜索结果按 Enter 直接运行 skill +- **NLQ smoke test** — `run_nlp_selected_skill` 空列表/无技能/执行管道测试 +- **TUI SkillPanel 拆分** — `SkillPanelState` 提取 7 个字段,App 51→44 字段 + +### Fixed + +- 29 个生产代码 unwrap 全部清零 +- 30 个 clippy 警告清零 + +## [0.7.0] - 2026-04-20 + +### Added + +- **NLQ 自然语言查询** — TUI `[:]` 触发 embedding 语义搜索,fallback 降级文本搜索 +- **智能同步建议** — `sync/policy.rs::recommend_sync_action` 基于 safety/ahead/behind 生成建议 + +## [0.6.0] - 2026-04-18 + +### Added + +- **Mind Market 评分系统** — `skill_runtime::scoring` + - `success_rate` + `usage_count` + `rating`(0-5 分公式) + - CLI:`skill recalc-scores` / `skill top` / `skill recommend` +- **TUI Workflow 执行** — `[w]` 详情页 `r/Enter` 运行 + 结果弹窗 + +## [0.5.0] - 2026-04-17 + +### Added + +- **Workflow Engine v0.5.0** — YAML 编排多步骤自动化 + - 5 种 step 类型:skill / subworkflow / parallel / condition / loop + - 拓扑调度(Kahn 算法)+ batch 并行执行 + - 变量插值:`${inputs.x}` / `${steps.y.outputs.z}` + - 错误策略:Fail / Continue / Retry / Fallback + - Schema v17:`workflows` + `workflow_executions` 表 +- **CLI/TUI Workflow 集成** — `devbase workflow {list,show,register,run,delete}` + `[w]` 面板 + +## [0.4.0] - 2026-04-15 + +### Added + +- **Schema v16 统一实体模型** — `entity_types` + `entities` + `relations` 表,渐进双写 +- **Skill 自动封装** — `devbase skill discover ` 自动分析项目 CLI/API,生成 SKILL.md +- **Git URL Discover** — `devbase skill discover https://github.com/...` 克隆+分析+注册 +- **MCP `devkit_skill_discover`** — 35 tools 总数 + +## [0.3.0] - 2026-04-12 + +### Added + +- **34 MCP tools 全量通过 MCP Inspector** +- **README Quick Start 三步内跑通** +- **CI/CD** — `.github/workflows/ci.yml`(check / test / fmt / clippy on Windows) +- **GitHub Release 预编译二进制** + +## [0.2.4] - 2026-04-20 + +### Architecture + +- **Outboard Brain Embedding Architecture** — Embedding generation moved to external Skill/MCP Server + - `embedding.rs` stripped of Ollama/OpenAI generation logic; storage protocol only (`embedding_to_bytes`, `bytes_to_embedding`, `cosine_similarity`) + - `knowledge_engine.rs` no longer generates embeddings during indexing + - Aligns with "store + search in devbase, compute in Clarity/Skill" boundary + +### Changed + +- **Breaking** — `devkit_semantic_search` now accepts `query_embedding: number[]` instead of `query: string` + - Embedding generation is the caller's responsibility (external MCP Server or Skill) + - Removed `config.embedding.enabled` gate; search works as long as embeddings exist in DB + +### Added + +- **`devkit_embedding_store`** — Store externally-generated embedding vectors into SQLite + - Parameters: `repo_id`, `symbol_name`, `embedding: number[]` + - Upsert semantics (ON CONFLICT UPDATE) +- **`devkit_embedding_search`** — Alias for `devkit_semantic_search` with vector-based interface + - Same parameters and behavior, alternative name for workflow clarity +- **MCP tool count**: 25 → 31 + +## [0.2.4] - 2026-04-20 (continued) + +### Added + +- **`devkit_hybrid_search`** — Hybrid vector + keyword search via RRF merge (Beta) + - `search::hybrid.rs`: `rrf_merge()` (Reciprocal Rank Fusion, k=60), `keyword_search_symbols()` (SQLite LIKE on name/signature), `hybrid_search_symbols()` (auto-fallback to keyword when embedding missing) + - `registry::knowledge::hybrid_search_symbols()` wrapper + - Recommended default search tool for code concept discovery +- **`devkit_cross_repo_search`** — Cross-repository symbol search filtered by tags (Beta) + - `registry::knowledge::cross_repo_search_symbols()`: INTERSECT-based tag filtering (AND semantics), per-repo hybrid search, global dedup+sort + - Searches all repos matching ALL specified tags +- **`devkit_knowledge_report`** — Workspace knowledge coverage report (Beta) + - `src/oplog_analytics.rs`: `generate_report()` with table-existence guards for resilient querying + - Reports: repo_count, total_symbols, total_embeddings, total_calls, coverage_pct, per-repo breakdown, health_summary, recent_activity +- **`devkit_related_symbols`** — Explicit symbol-to-symbol knowledge links (Experimental) + - Schema v13: `code_symbol_links` table (source_repo, source_symbol, target_repo, target_symbol, link_type, strength) + - `src/symbol_links.rs`: `compute_similar_signature_links()` (Jaccard token overlap), `compute_co_located_links()` (same-file clustering) + - `generate_and_save_links()`: persists links with ON CONFLICT IGNORE upsert +- **External Embedding Provider** — Reference Python implementation in `examples/embedding-provider/` + - `index.py`: Ollama `/api/embeddings` client, batch generation, cross-platform registry DB path + - Byte-compatible f32 little-endian serialization via `struct.pack` + - CLI: `--repo-id`, `--model`, `--ollama-url`, `--batch-size`, `--force` +- **Schema v13** — `code_symbol_links` table for explicit conceptual relationships + +### Engineering + +- **Context Safety Mechanism** — Formalized as long-term architecture principle + - Sub-agent execution: serial + commit-isolated work directories (prevents compilation races) + - MCP tool idempotency: all state-mutating tools use ON CONFLICT UPDATE / transaction boundaries + - OpLog as immutable audit trail for all state transitions + +--- + +## [0.2.3] - 2026-04-20 + +### Added + +- **Semantic Vector Search (Wave 1)** — Cosine-similarity code symbol search + - `code_embeddings` table (Schema v11): `repo_id + symbol_name` PK, BLOB embedding, `generated_at` + - `embedding.rs`: Ollama/OpenAI-compatible generation + `cosine_similarity` + byte serialization + - `devkit_semantic_search` MCP tool (Beta): natural-language → embedding → top-K symbols +- **Multi-Language Symbol Extraction (Wave 2)** — tree-sitter AST parsing beyond Rust + - `tree-sitter-python`, `tree-sitter-typescript`, `tree-sitter-go` dependencies + - `SymbolType` expanded: Function, Struct, Enum, Trait, Impl, Module, Class, Interface, TypeAlias, Constant, Static + - Per-language call-target resolvers for Call Graph construction + - Languages supported: Rust, Python, JavaScript, TypeScript, Go +- **Call Graph Analysis** — Intra-repo function call relationship extraction + - `code_call_graph` table (Schema v10): caller → callee edges with line numbers + - `devkit_call_graph` MCP tool: "Who calls `register_tool`?" +- **Cross-Repo Dependency Graph expansion** + - `CMakeLists.txt` parsing: `find_package`, `add_subdirectory`, `FetchContent_Declare`, `target_link_libraries` + - `ManifestKind::CMake` added to dependency graph builder +- **Dead Code Detection** — `devkit_dead_code` MCP tool (Experimental) + - SQL `NOT EXISTS` query over call graph to find functions with zero incoming edges + - `LIKE 'pub%fn%'` heuristic to exclude non-public functions +- **arXiv Integration** — Pure string-parsing Atom XML fetcher (zero heavy XML deps) + - `arxiv.rs`: `PaperMetadata` with title/authors/summary/published/category + - `devkit_arxiv_fetch` MCP tool (Beta): fetch by arXiv ID +- **Performance Benchmarks** — Criterion suite (`benches/semantic_index.rs`) + - `index_repo_full` (small/medium/full parameterization) + - `cosine_similarity` (128/512/768 dims) + - `extract_symbols` (Rust/Python/Go comparison) + - `parse_cmake_lists` (CMake parsing) +- **Structured OpLog (Schema v12)** — Typed event system + - `OplogEventType` enum replacing free-text `operation` field + - JSON metadata + `duration_ms` for observability + - Migration: `CASE` mapping from legacy strings to enum variants + +### Fixed + +- **`scan` async panic** — `fetch_github_stars` now runs in `std::thread::spawn` isolation + - Prevents `reqwest::blocking::Client` drop inside tokio runtime from causing panic + - `block_on_async()` helper detects runtime context and uses `mpsc` or temporary runtime +- **Dead code false positives** — `pub fn` → `pub%fn%` SQL LIKE match covers `pub async fn` / `pub(crate) fn` / `pub unsafe fn` + - Excludes `main()` from dead code results +- **Clippy warnings** — 12+ lints resolved (`manual_strip`, `collapsible_if`, `FromStr`, `type_complexity`, `useless_format`, etc.) + +### Changed + +- **`nl_filter_repos`** — Now uses Tantivy full-text search as primary path + - Falls back to structured SQL filtering when Tantivy is unavailable + +--- + +## [0.2.2] - 2026-04-21 + +### Added + +- **Vault Backlinks** — Find notes that link to a given note + - `vault::backlinks:` query prefix + - TUI detail panel shows "被引用" section with backlink count and list + - MCP tool `devkit_vault_backlinks` — AI can discover note relationships + - `vault/backlinks.rs` with `build_backlink_index()` and `get_backlinks()` + +### Changed + +- **Schema v8** — `vault_notes` table no longer has `content` column + - Migration: auto-creates `vault_notes_v2`, migrates data, drops old table + - `save_vault_note` / `list_vault_notes` SQL updated to 8 columns + - Filesystem-first architecture now complete at the database level + +## [0.2.1] - 2026-04-20 + +### Added + +- **Vault Watch** — Filesystem watcher for `workspace/vault/` + - Auto-refresh TUI vault list when notes are edited externally + - 500ms debounce to avoid excessive reloads +- **Vault Tantivy Search** — `vault:` queries now use Tantivy full-text index + - Replaces slow SQLite LIKE + per-file reading + - Supports keyword scoring and ranking +- **MCP Registry Manifest** — `server.json` for official MCP Registry submission + +### Changed + +- `query.rs` vault branch: uses `search_vault()` instead of in-memory filtering + +## [0.2.0] - 2026-04-20 + +### Added + +- **Vault System** — Markdown note management with Obsidian-compatible PARA structure + - `vault/` directory with PARA folders: 00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta + - Filesystem-first architecture: note content lives in `.md` files, SQLite only indexes metadata + - YAML frontmatter parsing (title, tags, aliases, date) + - WikiLink `[[...]]` extraction and backlink index building +- **TUI Vault View** — Press `Tab` to switch between Repo list and Vault note list + - Vault list shows note titles with tag indicators + - Detail panel previews note content (first 20 lines), tags, and outgoing links + - `Enter` opens selected note in VS Code +- **MCP Vault Tools** — 3 new tools for AI Agent vault interaction + - `devkit_vault_search` — full-text search across vault notes + - `devkit_vault_read` — read note content and frontmatter by path + - `devkit_vault_write` — write or append to vault notes +- **P2-lite: repos.toml** — Optional static configuration override for repositories + - Declare tags, tier, and workspace_type in `workspace/repos.toml` + - Overrides are applied on top of auto-discovered repo metadata +- **Unified Node Model** — `core::node::{Node, NodeType, Edge}` abstraction + - `NodeType::GitRepo | VaultNote | Asset | ExternalLink` + - Foundation for future Knowledge Graph unification +- **Workspace Directory** — `%LOCALAPPDATA%/devbase/workspace/` with `vault/` and `assets/` +- **MCP Client Config** — `mcp.json` for Claude Desktop / Cursor integration + +### Changed + +- **Architecture principle**: File system = source of truth; SQLite/Tantivy = derived index/cache +- Vault notes no longer store `content` in SQLite (read from disk on demand) + +## [0.1.0] - 2026-04-20 + +### Added + +- **TUI Dashboard** — Terminal UI for multi-repository workspace management + - Repository list with status icons, stars, and tag indicators + - Detail panel with Overview / Health / Insights tabs + - Stars Trend sparkline (30-day history) + - Help Overlay with categorized keyboard shortcuts + - Responsive layout: compact / standard / wide screen modes + - Cross-repository code search (ripgrep + Tantivy dual mode) + - One-key launch into gitui / lazygit +- **MCP Server** — 14 tools for AI Agent integration (stdio transport) + - `devkit_scan`, `devkit_health`, `devkit_sync`, `devkit_query_repos` + - `devkit_code_metrics`, `devkit_module_graph`, `devkit_natural_language_query` + - `devkit_index`, `devkit_query`, `devkit_note`, `devkit_digest` + - `devkit_github_info`, `devkit_paper_index`, `devkit_experiment_log` +- **Safe Sync Engine** — Four-tier sync policies: Mirror / Conservative / Rebase / Merge + - Pre-sync safety assessment (dirty, diverged, detached HEAD detection) + - Dry-run preview with per-repo recommendations + - Async batch sync with concurrency control and timeout +- **Registry & Indexing** — SQLite-backed workspace registry + - Automatic Git + non-Git workspace discovery + - Schema migrations with automatic backup snapshots + - GitHub Stars cache with TTL and historical tracking + - Tantivy full-text index for repository knowledge search +- **Health Monitoring** — Workspace-wide health checks + - Git status tracking (dirty / ahead / behind / diverged) + - Blake3 hash snapshots for non-Git workspaces + - Environment tool version detection +- **i18n** — Chinese and English bilingual support +- **CI/CD** — GitHub Actions workflow for check, test, fmt, clippy on Windows + +### Engineering + +- Modular architecture: 22 crates modules with clear separation of concerns +- Dual lib+bin mode: `lib.rs` exports all modules for programmatic use +- Theme system with semantic color tokens (dark/light ready) +- Render layer split from monolithic 1026-line file into 6 focused submodules + +### Security + +- `cargo audit` clean (0 vulnerabilities in direct dependencies) + +[0.20.1]: https://github.com/juice094/devbase/releases/tag/v0.20.1 +[0.20.0]: https://github.com/juice094/devbase/releases/tag/v0.20.0 +[0.19.0]: https://github.com/juice094/devbase/releases/tag/v0.19.0 +[0.18.0]: https://github.com/juice094/devbase/releases/tag/v0.18.0 +[0.1.0]: https://github.com/juice094/devbase/releases/tag/v0.1.0 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index ccd1b92..6345939 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -6,24 +6,35 @@ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, -nationality, personal appearance, race, religion, or sexual identity -and orientation. +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. ## Our Standards -Examples of behavior that contributes to a positive environment: +Examples of behavior that contributes to a positive environment for our +community include: - Demonstrating empathy and kindness toward other people - Being respectful of differing opinions, viewpoints, and experiences - Giving and gracefully accepting constructive feedback -- Accepting responsibility and apologizing to those affected by our mistakes +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall + community -Examples of unacceptable behavior: +Examples of unacceptable behavior include: -- The use of sexualized language or imagery, and sexual attention or advances +- The use of sexualized language or imagery, and sexual attention or advances of + any kind - Trolling, insulting or derogatory comments, and personal or political attacks - Public or private harassment -- Publishing others' private information without explicit permission +- Publishing others' private information, such as a physical or email address, + without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting ## Enforcement Responsibilities @@ -32,10 +43,18 @@ acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. ## Enforcement @@ -45,10 +64,70 @@ reported to the community leaders responsible for enforcement at All complaints will be reviewed and investigated promptly and fairly. +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 2.0, available at -https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. [homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 75e11e6..c7bdb1d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,7 +9,7 @@ | 指标 | 状态 | |:---|:---| | 版本 | v0.20.1 | -| 测试 | 485+ passed / 0 failed / 5 ignored | +| 测试 | 605 passed / 0 failed / 7 ignored | | Clippy | `-D warnings` 全绿 | | 生产代码 unwrap | 0 | | 许可证 | AGPL-3.0-or-later | @@ -140,17 +140,18 @@ cargo run -- mcp | **添加 MCP Tool** | `src/mcp/tools/` 新建模块 | `src/mcp/tools/mod.rs`, `src/mcp/mod.rs` | [AGENTS.md](AGENTS.md) "MCP 工具幂等性" | | **添加 Skill** | `skills/` 或外部 git 仓库 | `SKILL.md` 规范 | [AGENTS.md](AGENTS.md) "Skill 规范" | | **改进文档** | 直接编辑 `.md` 文件 | `README.md`, `AGENTS.md` | — | -| **重构 / 性能优化** | 先开 Issue 讨论 | — | [ARCHITECTURE.md](ARCHITECTURE.md) | +| **重构 / 性能优化** | 先开 Issue 讨论 | — | [`docs/architecture/invariants.md`](docs/architecture/invariants.md) | ### 添加 MCP Tool 的标准路径 1. 在 `src/mcp/tools/` 新建模块 -2. 实现 `McpTool` trait(`name()`, `description()`, `handle()`) -3. 在 `src/mcp/tools/mod.rs` 注册 -4. 在 `src/mcp/mod.rs` 的 `handle_request` 中路由 +2. 实现 `McpTool` trait(`name()`, `schema()`, `invoke()`,可选 `invoke_stream()`) +3. 在 `src/mcp/tools/mod.rs` 注册并 `pub use` +4. 在 `src/mcp/mod.rs` 的 `McpToolEnum` / 路由中加入该工具 5. **必须**添加单元测试到 `src/mcp/tests.rs` 6. 更新 `README.md` Tool 矩阵 7. 更新 `AGENTS.md` 工具计数 +8. 更新 `docs/reference/mcp-tools.md` 工具清单 > **核心原则**: 所有状态变更操作必须幂等(`ON CONFLICT ... DO UPDATE`)。 @@ -232,8 +233,9 @@ existence before registration. Returns structured validation report. | 文档 | 内容 | |:---|:---| -| [`ARCHITECTURE.md`](ARCHITECTURE.md) | 三层架构、技术决策记录、模块边界 | -| [`AGENTS.md`](AGENTS.md) | 安全原则、上下文机制、Schema 迁移规范、历史 Waves | +| [`docs/architecture/overview.md`](docs/architecture/overview.md) | 三层架构、技术决策记录、模块边界 | +| [`docs/architecture/invariants.md`](docs/architecture/invariants.md) | 架构红线 RF-1~RF-7 + 分层约束 | +| [`AGENTS.md`](AGENTS.md) | Agent 环境指引、安全原则、Schema 迁移规范 | | [`docs/architecture/`](docs/architecture/) | 预拆分评估、Workflow DSL 规范、统一实体模型 | | [`docs/research/`](docs/research/) | 竞品分析、Embedding 策略 | diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md index dbdb3f4..c92e80a 100644 --- a/KNOWN_ISSUES.md +++ b/KNOWN_ISSUES.md @@ -13,9 +13,9 @@ ## P1 — 测试覆盖 -### 28 个 MCP 工具缺少 invocation tests +### MCP 工具 invocation 测试覆盖不均 -**现状**:68 个工具中,40 个有 dedicated `invoke()` 测试(+3 本批次新增),28 个仅有 name/schema smoke tests 或零覆盖。 +**现状**:71 个工具中,约 45 个有 dedicated `invoke()` 测试,其余以 name/schema smoke tests 或间接覆盖为主。Stable 工具已实现 invocation 测试补全。 **影响**:Beta → Stable 的 promote 需要测试背书;无测试的工具在重构时存在回归风险。 @@ -23,27 +23,23 @@ | 工具 | Tier | 已有覆盖 | |------|------|----------| -| `devkit_index` | Beta | 间接(scenario) | | `devkit_index_health` | Beta | 无 | | `devkit_index_stream` | Beta | 无 | -| `devkit_status` | Beta | 无 | | `devkit_note` | Beta | 无 | | `devkit_digest` | Experimental | 无 | | `devkit_paper_index` | Experimental | 无 | -| `devkit_semantic_search` | Beta | 间接(scenario) | | `devkit_embedding_store` | Beta | 无 | | `devkit_embedding_search` | Beta | 无 | -| `devkit_cross_repo_search` | Beta | 间接(scenario) | | `devkit_related_symbols` | Experimental | 无 | | `devkit_search_quality` | Beta | 无 | | `devkit_impact_analysis` | Beta | 无 | | `devkit_project_brief` | Beta | 间接(scenario) | | `devkit_knowledge_report` | Beta | 间接(scenario) | -| `devkit_session_*` × 13 | Beta/Exp | 部分 smoke | -| `devkit_workflow_*` × 3 | Beta | 部分(workflow.rs 单元测试) | -| `devkit_evaluate` | Beta | 无 | +| `devkit_session_*` × 13 | Beta/Exp | 部分 smoke;save/list/resume 已有覆盖 | +| `devkit_evaluate` | Beta | smoke | +| `devkit_ontology_import` | Beta | smoke | -**建议**:按调用频率排序,优先为 Index、Status、Workflow、Session save/list 添加测试。 +**建议**:按调用频率排序,持续为 IndexHealth、Session 记忆召回、OntologyImport、Embedding 相关工具添加 dedicated 测试。 --- diff --git a/README.md b/README.md index 1db3877..d022aec 100644 --- a/README.md +++ b/README.md @@ -1,146 +1,147 @@ -
- -# 🗄️ devbase - -> **开发者工作空间的世界模型编译器** - -一套引擎,统一代码上下文、知识记忆与智能体推理。 - -[![Version](https://img.shields.io/badge/version-v0.20.1-blue)](https://github.com/juice094/devbase/releases) -[![Tests](https://img.shields.io/badge/tests-494%2B%20passed-brightgreen)](https://github.com/juice094/devbase/actions) -[![Clippy](https://img.shields.io/badge/clippy-0%20warnings-green)](https://github.com/juice094/devbase/actions) -[![License](https://img.shields.io/badge/license-AGPL--3.0%20%2F%20Commercial-orange)](LICENSE) -[![Rust](https://img.shields.io/badge/rust-1.95%2B-9cf)](https://www.rust-lang.org) -[![Glama](https://glama.ai/mcp/servers/juice094/devbase/badges/score.svg)](https://glama.ai/mcp/servers/juice094/devbase) - -
- ---- - -## 📋 简介 - -devbase 将代码库、笔记与工作流编译为 AI 可推理的结构化情境 — 不是存储数据,是构建环境的心智模型。 - -| 你是谁 | devbase 为你做什么 | -|:---|:---| -| **人类开发者** | `devbase tui` — 终端仪表盘,一眼看清 N 个仓库的 Git 状态,按 `s` 批量安全同步 | -| **AI 智能体** | 71 个 MCP 工具:通过 `devkit_skill_run` 发现、执行、编排 Skill — 不再重复造轮子 | -| **项目维护者** | `devbase skill discover .` — 一键将项目封装为 Skill,让 AI 用户能够发现和调用 | - ---- - -## 🌟 核心亮点 - -| 亮点 | 说明 | -|:---|:---| -| 📊 **TUI 仪表盘** | ratatui 终端界面:跨仓库搜索、安全同步、Skill/Workflow 发现 | -| 🔌 **71 个 MCP 工具** | stdio 本地进程通信:仓库管理、代码分析、知识图谱、智能体记忆 | -| 🏠 **本地优先** | 零数据离开本机 — SQLite + Tantivy + tree-sitter,无需云端 | -| 🔍 **混合检索** | BM25 全文 + FTS5 技能搜索 + 纯 SQL 向量搜索(`cosine_similarity` UDF),零 ML 运行时依赖 | - -> [完整 71 个 Tool 矩阵 → docs/guides/mcp-integration.md](docs/guides/mcp-integration.md) - ---- - -## 🔧 技术栈 - -| 组件 | 技术 | -|:---|:---| -| 终端 UI | ratatui | -| 全文检索 | Tantivy (BM25) | -| 语义检索 | SQLite BLOB + `cosine_similarity` UDF | -| 代码解析 | tree-sitter (Rust/Python/TS/Go) | -| 关系存储 | SQLite (WAL 模式, OpLog 审计) | -| 协议 | Model Context Protocol (stdio) | - ---- - -## 📁 项目结构 - -``` -devbase/ -├── src/ -│ ├── main.rs # CLI 入口:命令解析与分发 -│ ├── tui/ # 终端仪表盘(ratatui) -│ │ # 多仓库导航、跨仓库搜索、安全同步预览 -│ ├── mcp/ # MCP Server(71 个工具,stdio 通信) -│ │ # 人类与 AI 的统一接口层 -│ ├── registry/ # 仓库注册表:Git 状态、健康检查、批量同步 -│ ├── index/ # Tantivy 全文索引 + SQLite 向量索引 -│ │ # 混合检索核心,BM25 + cosine 向量评分 -│ ├── vault/ # PARA 笔记系统:双向链接、BFS 图遍历 -│ ├── skill/ # Skill 生命周期:发现 → 安装 → 执行 → 评分 → 发布 -│ │ # 自动封装项目为 AI 可调用的 Skill -│ ├── workflow/ # YAML 编排引擎:5 种 step 类型,拓扑调度 + 并行执行 -│ └── session/ # 智能体会话生命周期 + 向量记忆持久化 -├── docs/ -│ ├── architecture/ # 架构文档总览 -│ └── guides/ # 集成指南(Claude Code / 5ire / Kimi CLI) -├── scripts/ -│ ├── install.ps1 # Windows 一键安装 -│ ├── install.sh # Linux/macOS 一键安装 -│ └── devbase-claude.ps1 # Claude Code 一键启动器 -└── README.md -``` - -### 核心设计 - -**三层架构**: -1. **交互层** — TUI 仪表盘 + MCP Server + Workflow 引擎(人类与 AI 的接口) -2. **编译层** — 感知(tree-sitter/Tantivy/Git)→ 知识(图谱/向量/关系)→ 策略(同步/工作流/健康守卫) -3. **可靠层** — SQLite WAL 并发安全 + 索引健康检测 + OpLog 全操作审计 - -> 可靠性红线:所有 Registry 写入必须留下不可变审计痕迹(OpLog);Schema 迁移前自动生成快照。详见 [docs/architecture/overview.md](docs/architecture/overview.md)。 - ---- - -## 🚀 快速开始 - -```powershell -# Windows 一行安装 -irm https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.ps1 | iex - -# 或下载预编译二进制(~8.7 MB) -# https://github.com/juice094/devbase/releases/tag/v0.20.1 -``` - -```bash -# Linux / macOS -curl -fsSL https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.sh | bash - -# 基础工作流 -devbase scan . --register # 1. 扫描并注册工作区 -devbase tui # 2. 打开仪表盘 -devbase mcp # 3. 启动 MCP 服务端(供 AI 调用) -``` - -**AI 助手配置** — 添加到 `claude_desktop_config.json` 或 `~/.kimi/mcp.json`: -```json -{ "mcpServers": { "devbase": { "command": "devbase", "args": ["mcp"] } } } -``` - ---- - -## 🤝 参与贡献 - -详见 [CONTRIBUTING.md](CONTRIBUTING.md) — 添加 MCP 工具、Skill Schema、构建模式说明。快速验证: - -```bash -cargo build --release -cargo test --all-targets -cargo clippy --all-targets -D warnings -``` - ---- - -## 📄 许可证 - -双许可证:[AGPL-3.0+](LICENSE) 开源 / [商业授权](LICENSE-COMMERCIAL.md) 闭源使用。联系:`juice094@protonmail.com`。 - ---- - -
- -**[⭐ Star](https://github.com/juice094/devbase) · [🐛 Issues](https://github.com/juice094/devbase/issues) · [🤝 Contribute](CONTRIBUTING.md)** - -
+
+ +# 🗄️ devbase + +> **开发者工作空间的世界模型编译器** + +一套引擎,统一代码上下文、知识记忆与智能体推理。 + +[![Version](https://img.shields.io/badge/version-v0.20.1-blue)](https://github.com/juice094/devbase/releases) +[![Tests](https://img.shields.io/badge/tests-605%2B%20passed-brightgreen)](https://github.com/juice094/devbase/actions) +[![Clippy](https://img.shields.io/badge/clippy-0%20warnings-green)](https://github.com/juice094/devbase/actions) +[![License](https://img.shields.io/badge/license-AGPL--3.0%20%2F%20Commercial-orange)](LICENSE) +[![Rust](https://img.shields.io/badge/rust-1.95%2B-9cf)](https://www.rust-lang.org) +[![Glama](https://glama.ai/mcp/servers/juice094/devbase/badges/score.svg)](https://glama.ai/mcp/servers/juice094/devbase) + +
+ +--- + +## 📋 简介 + +devbase 将代码库、笔记与工作流编译为 AI 可推理的结构化情境 — 不是存储数据,是构建环境的心智模型。 + +| 你是谁 | devbase 为你做什么 | +|:---|:---| +| **人类开发者** | `devbase tui` — 终端仪表盘,一眼看清 N 个仓库的 Git 状态,按 `s` 批量安全同步 | +| **AI 智能体** | 71 个 MCP 工具:通过 `devkit_skill_run` 发现、执行、编排 Skill — 不再重复造轮子 | +| **项目维护者** | `devbase skill discover .` — 一键将项目封装为 Skill,让 AI 用户能够发现和调用 | + +--- + +## 🌟 核心亮点 + +| 亮点 | 说明 | +|:---|:---| +| 📊 **TUI 仪表盘** | ratatui 终端界面:跨仓库搜索、安全同步、Skill/Workflow 发现 | +| 🔌 **71 个 MCP 工具** | stdio 本地进程通信:仓库管理、代码分析、知识图谱、智能体记忆 | +| 🏠 **本地优先** | 零数据离开本机 — SQLite + Tantivy + tree-sitter,无需云端 | +| 🔍 **混合检索** | BM25 全文 + FTS5 技能搜索 + 纯 SQL 向量搜索(`cosine_similarity` UDF),零 ML 运行时依赖 | + +> [完整 71 个 Tool 矩阵 → docs/reference/mcp-tools.md](docs/reference/mcp-tools.md) + +--- + +## 🔧 技术栈 + +| 组件 | 技术 | +|:---|:---| +| 终端 UI | ratatui | +| 全文检索 | Tantivy (BM25) | +| 语义检索 | SQLite BLOB + `cosine_similarity` UDF | +| 代码解析 | tree-sitter (Rust/Python/TS/Go) | +| 关系存储 | SQLite (WAL 模式, OpLog 审计) | +| 协议 | Model Context Protocol (stdio) | + +--- + +## 📁 项目结构 + +``` +devbase/ +├── src/ +│ ├── main.rs # CLI 入口:命令解析与分发(RF-4 ≤ 1000 行) +│ ├── lib.rs # 导出 30+ 模块 +│ ├── commands/ # CLI 子命令实现 +│ ├── tui/ # 终端仪表盘(ratatui) +│ ├── mcp/ # MCP Server(71 个工具,stdio 通信) +│ ├── registry/ # SQLite Registry:schema、迁移、实体、关系 +│ ├── search/ # Tantivy BM25 + 向量混合检索 +│ ├── vault/ # PARA 笔记系统:双向链接、BFS 图遍历 +│ ├── skill_runtime/ # Skill 生命周期:发现 → 安装 → 执行 → 评分 → 发布 +│ ├── workflow/ # YAML 编排引擎:5 种 step 类型 +│ ├── knowledge_engine/# 代码符号提取与语义索引 +│ └── sync/ # 仓库同步编排与策略 +├── crates/ # 12 个独立 workspace crate +│ ├── devbase-core-types +│ ├── devbase-registry +│ ├── devbase-embedding +│ ├── devbase-vault-wikilink +│ └── ... +├── docs/ # 完整文档导航:docs/README.md +├── scripts/ # 安装脚本与 CI 辅助 +├── skills/ # 示例 Skill +└── README.md +``` + +### 核心设计 + +**三层架构**: +1. **交互层** — TUI 仪表盘 + MCP Server + Workflow 引擎(人类与 AI 的接口) +2. **编译层** — 感知(tree-sitter/Tantivy/Git)→ 知识(图谱/向量/关系)→ 策略(同步/工作流/健康守卫) +3. **可靠层** — SQLite WAL 并发安全 + 索引健康检测 + OpLog 全操作审计 + +> 可靠性红线:所有 Registry 写入必须留下不可变审计痕迹(OpLog);Schema 迁移前自动生成快照。详见 [docs/architecture/overview.md](docs/architecture/overview.md)。 + +--- + +## 🚀 快速开始 + +```powershell +# Windows 一行安装 +irm https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.ps1 | iex + +# 或下载预编译二进制(~8.7 MB) +# https://github.com/juice094/devbase/releases/tag/v0.20.1 +``` + +```bash +# Linux / macOS +curl -fsSL https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.sh | bash + +# 基础工作流 +devbase scan . --register # 1. 扫描并注册工作区 +devbase tui # 2. 打开仪表盘 +devbase mcp # 3. 启动 MCP 服务端(供 AI 调用) +``` + +**AI 助手配置** — 添加到 `claude_desktop_config.json` 或 `~/.kimi/mcp.json`: +```json +{ "mcpServers": { "devbase": { "command": "devbase", "args": ["mcp"] } } } +``` + +--- + +## 🤝 参与贡献 + +详见 [CONTRIBUTING.md](CONTRIBUTING.md) — 添加 MCP 工具、Skill Schema、构建模式说明。快速验证: + +```bash +cargo build --release +cargo test --all-targets +cargo clippy --all-targets -D warnings +``` + +--- + +## 📄 许可证 + +双许可证:[AGPL-3.0+](LICENSE) 开源 / [商业授权](LICENSE-COMMERCIAL.md) 闭源使用。联系:`juice094@protonmail.com`。 + +--- + +
+ +**[⭐ Star](https://github.com/juice094/devbase) · [🐛 Issues](https://github.com/juice094/devbase/issues) · [🤝 Contribute](CONTRIBUTING.md)** + +
diff --git a/SECURITY.md b/SECURITY.md index 113506a..77453dd 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,8 +6,8 @@ The following versions of devbase currently receive security updates: | Version | Supported | | ------- | ------------------ | -| 0.15.x | :white_check_mark: | -| < 0.15 | :x: | +| 0.20.x | :white_check_mark: | +| < 0.20 | :x: | ## Reporting a Vulnerability diff --git a/SUPPORT.md b/SUPPORT.md index 159f358..0926d7a 100644 --- a/SUPPORT.md +++ b/SUPPORT.md @@ -3,9 +3,12 @@ ## Documentation - **User Guide**: See [`README.md`](./README.md) for installation, quick start, and feature overview -- **Architecture**: See [`ARCHITECTURE.md`](./ARCHITECTURE.md) for technical design and module boundaries +- **Documentation Index**: See [`docs/README.md`](./docs/README.md) for the full documentation map +- **Architecture**: See [`docs/architecture/overview.md`](./docs/architecture/overview.md) for technical design and module boundaries +- **Architecture Guardrails**: See [`docs/architecture/invariants.md`](./docs/architecture/invariants.md) for RF rules and tiered checks - **Agent Guidelines**: See [`AGENTS.md`](./AGENTS.md) for MCP tool conventions and schema migration rules - **Contributing**: See [`CONTRIBUTING.md`](./CONTRIBUTING.md) for build instructions and PR checklist +- **Changelog**: See [`CHANGELOG.md`](./CHANGELOG.md) for version history ## Bug Reports & Feature Requests diff --git a/docs/AGENTS-full.md b/docs/AGENTS-full.md deleted file mode 100644 index de2ea85..0000000 --- a/docs/AGENTS-full.md +++ /dev/null @@ -1,705 +0,0 @@ -# Agent 环境指引 - -`devbase` 是 **本地情境编译器(Local Context Compiler)** —— AI agent 在本地数字世界中的海马体。 - -> 它将本地数字资产的原始数据(代码库、笔记、Skill、工作流)编译为 AI 可决策的结构化情境,不负责思考,不负责执行,只负责感知、编码、持久化、检索。 - -- **当前阶段**:阶段十一 — v0.20.0 已发布(知识完备性) -- **当前版本**:v0.20.0(Schema 34,68 MCP tools,451 tests) -- **已完成里程碑**:Registry God Object 完全拆解(10 子模块提取)+ 18 workspace crates 提取 + MCP Python SDK 1.16.0 兼容修复 + repo.rs trait 化 + flaky 测试根治(RF-2.1/2.2/2.3)+ 许可证迁移 + health 性能优化(-44%)+ index skip-embeddings + batch encoding 实验 + RF-6 清零 + 架构治理文档(ADR/不变量清单)+ Tantivy BM25 代码符号搜索(P1)+ AppContext 职责拆分 Phase 1/2(storage.rs 860→430 行)+ 架构不变量 CI(G5/T11/T12)+ Embedding 多后端(Candle/Ollama 配置切换, P3)+ EnvVersionCache 扩展(9 工具链检测, P4)+ **v0.16.0 Agent Contexts(P1/P2/P3)**:`agent_contexts`/`agent_memories`/`context_entity_links` Schema + 9 个 Session MCP tools + Context-aware Skill Runtime(`DEVBASE_ACTIVE_CONTEXT` 注入)+ **v0.16.1 Workflow-Session Binding**:`workflow_executions.context_id` + 执行自动绑定 Active Context + **v0.17.0 Embedding Externalization**:`embedding` 从 default features 移除(Candle/Ollama 降级为 opt-in `llm-backend`)+ Schema 34 向量存储 + `cosine_similarity` SQLite UDF + `devkit_session_recall` / `devkit_session_index`(60 tools)+ **v0.18.0 ClaudeCode Integration**:`devkit_project_brief`(Markdown 项目简报)+ `devkit_impact_analysis`(修改影响范围分析)+ `devkit_session_export` / `devkit_session_import` + `scripts/devbase-claude.ps1` 启动器(自动注入 `.claude/CLAUDE.md`)+ RFC `docs/RFC/claudecode-workflow-integration.md`(64 tools)+ **v0.18.0 发布收尾**:PR 合并 + 双平台二进制构建 + GitHub Release + 根目录治理 + 世界模型战略认知沉淀(Vault + AGENTS 双向联动)+ NotebookLM 生态消化(5 项目注册)+ GreptimeDB 互补分析 + **v0.19.0 知识基础设施硬化**:SQLite WAL 默认启用 + `devkit_index_health`(Beta)+ Vault 导出(`devkit_vault_export`)+ Redis ADR 决策(放弃引入)+ **v0.20.0 知识完备性**:Vault 双向链接 BFS 图遍历(`devkit_vault_graph` 扩展)+ Vault Git-based 历史追踪(`devkit_vault_history`,第 67 个 tool)+ 混合检索质量监控(`devkit_search_quality`,第 68 个 tool,`HybridSearchMetrics`)+ Block 引用支持(`WikiLink.anchor`:`[[note#heading]]` / `[[note#^block-id]]`)+ 性能回归基线(`#[ignore]` 1k/10k 阈值测试)+ 客户端无关原则(Client-Agnostic Principle)落地 + `skill sync` 泛化接口(零硬编码客户端路径) -- **核心方向**:让 Kimi CLI 在调用文件工具之前,先通过 devbase 获得"该读哪些文件、为什么读、它们之间的关系" -- **本质分析**:见 `vault/99-Meta/devbase-essence-analysis-20260430.md` 与 `docs/architecture/redefinition.md` -- **设计文档**: - - [`docs/architecture/workflow-dsl.md`](docs/architecture/workflow-dsl.md) — Workflow DSL 规范 - - [`docs/architecture/workspace-as-schema.md`](docs/architecture/workspace-as-schema.md) — 统一实体模型设计 - - [`docs/RFC/agent-memory-vector-storage.md`](docs/RFC/agent-memory-vector-storage.md) — v0.17.0 Agent Memory 向量存储 RFC(Embedding 职责外迁设计) - - [`docs/guides/mcp-integration-guide.md`](docs/guides/mcp-integration-guide.md) — MCP 集成指南 - - [`docs/README.md`](docs/README.md) — 完整文档导航 - -Skill Runtime 全生命周期已落地(含依赖管理 Schema v15),Schema v16 统一实体模型(entities/relations)已落地,Skill 自动封装(`discover`)已落地。 - -- **技术栈**:Rust 2024, SQLite, tokio, ratatui, git2, reqwest, tantivy -- **Registry DB**:`%LOCALAPPDATA%\devbase\registry.db`(轻量索引,用户本地,永不进入版本控制) -- **Workspace**:`%LOCALAPPDATA%\devbase\workspace/` —— 文件系统 = source of truth - - `vault/` —— PARA 结构:00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - - `assets/` —— 二进制资源 -- **MCP Server**:stdio only,**68 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 4 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具);配置见 `mcp.json` -- **Kimi CLI 集成**:MCP server 已通过 `kimi mcp add` 注册,端到端验证通过(`kimi --print` 成功调用 `devkit_health`);项目级 skill 位于 `.kimi/skills/devbase-project/SKILL.md` -- **统一节点模型**:`core::node::{Node, NodeType, Edge}` —— GitRepo / VaultNote / Asset / ExternalLink -- **当前测试**:451+ lib passed / 0 failed / 5 ignored + 11/11 integration passed(`tests/cli.rs`) -- **编译状态**:0 warning / 0 vulnerabilities(`cargo audit` 干净,除上游 `tokei` 的 `RUSTSEC-2020-0163`) -- **Workspace 结构**:`crates/` 目录已启用,19 个零耦合模块已提取为独立 crate(`devbase-symbol-links`, `devbase-sync-protocol`, `devbase-core-types`, `devbase-syncthing-client`, `devbase-vault-frontmatter`, `devbase-vault-wikilink`, `devbase-workflow-interpolate`, `devbase-workflow-model`, `devbase-registry-health`, `devbase-registry-metrics`, `devbase-registry-workspace`, `devbase-embedding`, `devbase-skill-runtime-types`, `devbase-skill-runtime-parser`, `devbase-registry-entity`, `devbase-registry-relation`, `devbase-registry-call-graph`, `devbase-registry-dead-code`, `devbase-registry-code-symbols`) -- **Workflow Engine**:YAML 解析 + 拓扑调度 + batch 并行执行 + 5 种 step 类型(skill/subworkflow/parallel/condition/loop) -- **NLQ 自然语言查询**:TUI `[:]` 触发 embedding 语义搜索,fallback 降级文本搜索 -- **Mind Market 评分**:success_rate / usage_count / rating(0-5),`skill recalc-scores/top/recommend` - -## 关键约定 - -1. **文件操作**:读取用 `ReadFile`,搜索用 `Grep`/`Glob`,修改用 `StrReplaceFile`,整文件重写用 `WriteFile` -2. **Shell**:Windows PowerShell;用 `;` 分隔命令 -3. **Git**:提交前必须通过 `cargo test --all-targets` + `cargo clippy --all-targets -D warnings` + `cargo fmt --check` -4. **Schema 迁移**:`PRAGMA user_version` 安全升级;升级前自动调用 `backup::auto_backup_before_migration()` - -## 安全原则 - -### 本地优先(Local-First) - -- **Registry DB** 始终存储在用户的本地配置目录(`dirs::config_dir()/devbase/`),绝不向远程传输 -- **代码内容** 不会被上传到任何云端服务(除非用户显式配置 GitHub token 用于 stars 查询) -- **MCP Server** 仅通过 stdio 本地进程通信,不暴露网络端口 - -### 客户端无关(Client-Agnostic) - -> devbase 的核心能力(编排、注册、索引、搜索、同步)必须在不依赖任何特定 AI 客户端的前提下独立运行。 - -- ✅ **允许**:向通用目录输出数据,由用户自行分发给任意客户端(如 `skill sync --output-dir ./plans`) -- ✅ **允许**:实现标准协议(MCP)供任意客户端连接 -- ❌ **禁止**:核心能力硬编码特定客户端的路径、API、或配置格式(如 `C:\Users\xxx\.claude`) -- ❌ **禁止**:核心能力的可用性取决于某个客户端是否安装 -- 🟡 **适配层**:`scripts/claude/`、`docs/clients/` 等目录下的客户端适配脚本属于配套示例,不归入核心版本控制 - -### 凭证管理 - -- GitHub token、LLM API key 存储在本地 `config.toml` 中 -- `config.toml` 位于用户配置目录,**不在项目工作目录**,因此不会被意外 `git commit` -- 默认配置模板中的 token 字段使用占位符 ``,避免真实 token 格式泄露 -- `.gitignore` 已覆盖 `*.db`、`.devbase/`、`.env*`、`*.local.toml` - -### 审计与备份 - -- 所有 `scan`/`sync`/`health` 操作自动写入 OpLog(SQLite `oplog` 表) -- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 -- Registry 支持 `export`/`import` 用于用户自主备份 - -## 许可证策略 - -- **主许可证**: AGPL-3.0-or-later (`LICENSE`) -- **商业授权**: 双许可模式,闭源/专有 SaaS 使用需联系作者 (`LICENSE-COMMERCIAL.md`) -- **Cargo.toml**: `license = "AGPL-3.0-or-later"` -- **SPDX 头**: 新增源文件应在顶部包含 AGPL-3.0 声明(见 `LICENSE` 末尾 "How to Apply" 部分) - -## 架构状态(Wave 15b 完成) - -| 维度 | 状态 | -|------|------| -| 代码质量 | `rustfmt.toml` + `cargo fmt` + `clippy -D warnings` 全绿 | -| 模块拆分 | `sync`→5 / `registry`→11 / `mcp` 测试分离 / `search`→hybrid / `oplog_analytics` / `symbol_links` / **workspace: 3 crates extracted** | -| 库/二进制 | `src/lib.rs` 导出全部 **30+** 个模块;`src/main.rs` 仅 CLI 入口 | -| TUI 架构 | `render/` 6 子模块 + `theme.rs` Design Token + `layout.rs` 响应式引擎 | -| 数据层 | Schema v23: `repos`/`vault_notes`/`papers`/`workflows`/`repo_modules_legacy` 表已删除;`entities` 为唯一数据源;`repo_tags/repo_remotes/repo_health/...` 为独立 JOIN 表(无 FK);仅 `skills` 保留独立表(embedding BLOB) | -| CI/CD | `.github/workflows/ci.yml`:check / test / fmt / clippy on Windows | -| 依赖安全 | `cargo audit` 0 漏洞(除上游 `tokei` 的 `RUSTSEC-2020-0163`) | - -## 架构红线(Architecture Guardrails) - -> 基于第一性原理的工程约束。违反任意一条 = HALT,转交人类裁决或回滚。 -> 规则编号 `RF-XX`(Red-line / Fitness function),带客观测量标准,非主观描述。 - -### RF-1: 依赖注入优于全局状态(Global State Anti-Pattern) - -**理论锚定**:全局可变状态使组件隐式耦合,破坏可测试性与可复用性(参考:Pure Function / DI 原则)。 - -**规则**: -- 禁止新增 `dirs::data_local_dir()` / `std::env::var_os` 硬编码路径。 -- 所有 IO 边界路径(DB、索引、备份、配置)必须通过参数、构造函数或 `trait` 注入。 -- **例外(Grandfathered)**:现有 3 处(`backup_dir`、`db_path`、`index_path`)在重构前不得新增第 4 处。 - -**Fitness Function**: -```bash -# 新增 PR 中不得出现新的全局路径硬编码 -grep -rn "dirs::data_local_dir\|std::env::var_os\|std::env::var(\"LOCALAPPDATA\"" src/ \ - | grep -v "backup.rs\|migrate.rs\|search.rs" -# 预期输出:空 -``` - -### RF-2: 测试密封性(Hermetic Testing) - -**理论锚定**:测试失败必须仅因被测代码缺陷,不因外部因素、测试顺序或并行调度(参考:Google Test Blog — Hermetic Servers)。 - -**规则**: -- 所有测试禁止修改全局进程状态(`std::env::set_var`、`static mut`、全局文件系统句柄)。 -- 文件系统测试必须使用 `tempfile` + 注入式路径,禁止直接操作 `%LOCALAPPDATA%` 或 `~/.config`。 -- Tantivy / SQLite 文件系统测试必须获取 `SEARCH_TEST_LOCK`(或同等级串行化机制)。 - -**子规则(来自 PR #4 教训)**: -- **R2.1 禁止 `DEVBASE_DATA_DIR` 全局注入**:并行测试中 `std::env::set_var("DEVBASE_DATA_DIR", ...)` 导致竞态;必须使用 `TempStorageBackend` 注入式替代。 -- **R2.2 Windows 路径双端规范化**:`TempDir` 可能返回短文件名(`TEMP~1`),而 `dunce::canonicalize` 返回长文件名;路径比较前必须对**双方**调用 `dunce::canonicalize`。 -- **R2.3 `git2` 测试显式身份 + 显式分支**: - - CI runner 无全局 `user.name`/`user.email` → `repo.signature()` 会 panic;必须改用 `git2::Signature::now("Test", "test@example.com")`。 - - `git2::Repository::init` 的默认分支在不同平台可能为 `master` 或 `main`;必须显式 `repo.set_head("refs/heads/main")` 并 commit 到 `"refs/heads/main"`。 - -**Fitness Function**: -```bash -# 高并发下 100% 通过,无 flaky -cargo test --test-threads=16 -``` - -### RF-3: Schema 单一事实来源(Single Source of Truth) - -**理论锚定**:重复信息必然 drift(参考:DRY 原则 + Evolutionary Architecture 的版本一致性约束)。 - -**规则**: -- `SCHEMA_DDL`(`registry/test_helpers.rs`)与 `migrate.rs` 必须原子同步。 -- 新增表、索引、列必须同时出现在两者中;禁止仅更新其一。 - -**Fitness Function**: -- CI 运行 `test_in_memory_schema_version` + schema 结构比对脚本(可手动运行 `cargo test registry::test_helpers::tests` 验证)。 - -### RF-4: 二进制入口限界(Bounded Context) - -**理论锚定**:CLI 入口应仅做命令分发,业务逻辑应在 lib 模块中(参考:Hexagonal Architecture / Ports & Adapters)。 - -**规则**: -- `main.rs` 行数不得超过 **1000 行**。 -- 新增 CLI 命令必须先拆分为 `commands/` 子模块或独立函数,禁止在 `main.rs` 中堆积业务逻辑。 - -**Fitness Function**: -```bash -# 当前 515 行(Phase 1/2/3 已削减 1003 行),远超目标 -[ $(wc -l < src/main.rs) -le 1000 ] || exit 1 -``` - -### RF-5: 无循环依赖(Acyclic Dependencies) - -**理论锚定**:循环依赖破坏模块化,使增量编译和独立复用不可能(参考:John Lakos — Large-Scale C++ Software Design)。 - -**规则**: -- 禁止模块间双向 `use crate::` 引用。 -- 新增模块必须通过脚本验证无循环(当前已满足,未来 PR 保持)。 - -**Fitness Function**: -```bash -# 文件级双向依赖检测(当前输出应为空) -for f in src/**/*.rs; do - name=$(basename "$f" .rs) - refs=$(grep -o 'use crate::[a-z_]*' "$f" | sed 's/use crate:://') - for r in $refs; do - if [ -f "src/$r.rs" ] && grep -q "use crate::$name\b" "src/$r.rs"; then - echo "CYCLE: $name <-> $r" - fi - done -done -``` - -### RF-7: Workspace 拆分约束(Module Distribution Readiness) - -**理论锚定**:模块能否独立发布是耦合健康度的金标准;不能拆分的模块 = 耦合不健康的模块。 - -**规则**: -- 新增模块若对 devbase 内部其他模块的 `crate::` 引用超过 **5 个**,禁止提取为 workspace crate。 -- 已提取 crate 的重新导出文件(`src/symbol_links.rs` 等)**禁止添加新代码**——顶部有 `RE-EXPORT ONLY` 注释作为守卫。 -- 子 crate 的依赖版本必须与 workspace 统一,禁止独立 bump。 - -**Fitness Function**: -```bash -# 扫描所有 src/*.rs,统计 crate:: 引用数 -for f in src/*.rs; do - count=$(grep -c 'crate::' "$f") - if [ "$count" -gt 15 ]; then - echo "HIGH COUPLING: $f ($count refs)" - fi -done -# 预期输出:空(或仅已标记的高耦合文件如 mcp/tools/repo.rs) -``` - -### RF-6: 生产代码无 panic(Crash-only Software) - -**理论锚定**:Rust 的 `Result` 类型将错误显式化;`unwrap` 是将运行时崩溃隐藏在类型系统背后(参考:Joe Armstrong — Let it crash,但 Rust 中崩溃 = 进程终止,不可接受)。 - -**规则**: -- 生产代码(`src/**/*.rs` 中不在 `#[cfg(test)]` 块内的代码)禁止 `unwrap()`、`expect()`、`panic!()`。 -- 测试代码不受此限,但鼓励使用 `?` 传播。 - -**Fitness Function**: -```bash -# 生产代码 unwrap 计数(排除 #[cfg(test)] 块及 tests.rs 文件) -for f in $(find src -name "*.rs"); do - test_line=$(grep -n "#\[cfg(test)\]" "$f" | head -1 | cut -d: -f1) - if echo "$f" | grep -qE "tests?\.rs$|_test\.rs$|/tests/"; then continue; fi - if [ -n "$test_line" ]; then - head -n "$((test_line - 1))" "$f" | grep -n "\.unwrap()" - else - grep -n "\.unwrap()" "$f" - fi -done -# 预期输出:空 -``` - -**状态**:🟢 **已完成**(v0.20.1 复核:生产代码 unwrap = 0;此前 1090 为测试模块误统计)。 - -### 架构治理框架(Architecture Governance) - -> 参考:外部架构治理方法论(Kimi 会话 `e9f2965f-b949-46a5-9d7c-afd6d4d9232c`) - -**已制度化实践**: - -| 实践 | devbase 落地形式 | 文档位置 | -|------|-----------------|---------| -| ADR(架构决策记录) | ADR-001(单 crate defer)、ADR-002(batch encoding 回滚) | [`docs/architecture/adr-template.md`](docs/architecture/adr-template.md) | -| 不变量清单(Invariants) | RF-1~RF-7 + 分层模块约束(T01–T12) | [`docs/architecture/invariants.md`](docs/architecture/invariants.md) | -| 模块提取演习 | RF-7 的 5 个 `crate::` 引用阈值 + 已提取 18 workspace crates | 本文件 §RF-7 | -| 三层摘要 | `crates/*/README.md` 要求:一句话 + 一页纸 + 深度链接 | 各 crate README | -| 定期架构回顾 | 每次 Wave 结束时的架构审计(见 `docs/_audit/`) | `docs/_audit/2026-04-26-*.md` | - -**待增强**: -- 三层摘要:部分已提取 crate 的 README 尚未达到"一页纸"标准 -- 定期架构回顾:当前按 Wave(功能迭代周期)触发,建议每 2–4 周增加一次纯架构 review(不看 feature 进度,只看不变量违反和隐式依赖) - ---- - -## 技术债登记簿(Technical Debt Ledger) - -> 已识别的架构债,按严重程度排序。清偿前不得新增同类债务。 - -| 债项 | 严重 | 当前值 | 目标阈值 | 清理路径 | 引入 Wave | -|---|---|---|---|---|---| -| `main.rs` 上帝文件 | 🟢 | 778 行 | ≤1000 行 | 拆分为 `commands/simple.rs` + `commands/skill.rs` + `commands/workflow.rs` + `commands/limit.rs`;全部 22 个命令/子命令树已迁移 | ≤15 | -| Workspace crate 版本号混乱 | 🟢 | **已完成**:全部 19 个 crate 统一为 `version.workspace = true`,workspace 版本 `0.20.0` | 全部统一为 `version.workspace = true` 或 `0.20.0` | 批量修正 `Cargo.toml` | v0.20.1 | -| RF-7 高耦合模块超标 | 🟢 | **已完成**:`scan.rs` 18→7、`digest.rs` 17→5、`registry.rs` 20→4;全部 `src/*.rs` ≤ 15 `crate::` 引用 | ≤15 | use 语句规范化消除 self-reference;`registry.rs` 保留 4 个(2 use + 2 dependency_graph)作为 facade | v0.20.1 | -| `init_db()` 全局路径 | 🟢 | `AppContext` 已集成到全部 commands/ 模块;`main()` 通过 `AppContext` 分发配置;`init_db()` 无外部调用 | 0 | 已完成:`StorageBackend` trait + `AppContext` 全面替代;`db_path`/`workspace_dir`/`index_path`/`backup_dir` 已统一 | ≤15 | -| Tantivy+SQLite 双写一致性 | 🟡 | 无事务协调;**已添加反向检测**(`repair_tantivy_consistency` 现在检测 SQLite→Tantivy 缺失) | 补偿机制 | 长期:事务协调或 SQLite FTS5 替代;短期:反向检测 + 日志已落地(`fe14c81`) | 7 | -| 主从表切换 | 🟢 | Phase 1 全部完成:`repos` 表已删除,entities 为唯一数据源 | `entities` 为第一公民 | Phase 2 类型系统开放(新增 entity_type 无需改表结构) | v0.12.0 | -| vault/paper/workflow entities 缺口 | 🟢 | Stage C+D+E 全部完成:`vault_notes`/`papers`/`workflows` 表已删除,`skills` 保留(embedding BLOB) | 0 缺口 | — | v0.12.0 | -| scan 路径排除 | 🟢 | `discover_repos` + `collect_tasks` 均支持 `scan.exclude_paths`;scan 和 sync 双阶段过滤 | 0 缺口 | 排除路径使用 `Path::starts_with` 组件级匹配,避免字符串前缀误杀;相对路径在 sync 场景(无 root)下被忽略 | v0.12.0 | -| tree-sitter 编译成本 | 🟢 | ~15-20s grammar C compilation | 可控 | 已完成 feature-gate:`lang-rust`/`lang-python`/`lang-js-ts`/`lang-go` 四个 feature,默认全启,可选关闭减少编译;`--no-default-features` 编译通过 | 8 | -| Feature flags 缺失 | 🟢 | 4 个可选 feature (tui, watch, mcp, embedding) | ≥3 | 已完成:`tui`/`watch`/`mcp`/`embedding` 均为 optional;`--no-default-features` 编译通过 | ≤15 | -| Vault 无版本历史 | 🟢 | `devkit_vault_history` + git2 revwalk + blob diff 行级统计 | 历史可回溯 | 用户侧将 vault 目录作为 Git 子模块管理 | v0.20.0 | -| `LOCALAPPDATA` 测试模式残留 | 🟢 | 0 处 | 0 | 全面废弃 `LOCALAPPDATA` 环境变量覆盖,统一为 `DEVBASE_DATA_DIR`;mcp/tests.rs 修复 cleanup 逻辑(remove_var 目标从 LOCALAPPDATA 修正为 DEVBASE_DATA_DIR) | 47 | -| 单体职责膨胀(代码智能+知识库+仓库管理+工作流+Skill+Syncthing) | 🟡 | 6 个核心领域耦合于单一二进制(31MB);`workflow`/`skill` 与 Claude Code Agent 能力重叠 | 按领域拆分为 `devbase-core`(代码+vault)+ `devbase-sync`(仓库管理)+ `devbase-bridge`(Syncthing);冻结 workflow/skill 新增 | 外部审查 2026-05-11 | - -**清偿原则**: -1. 禁止在清偿现有 🔴 债务前新增同类别债务。 -2. 每个债务必须关联至少一个 `TODO(#)` 或 `FIXME` 代码注释。 -3. 每季度(90 天)由 MODE-O 审查一次,更新当前值与优先级。 - ---- - -## 历史 Waves - -| Wave | 主题 | 关键产出 | Commit | -|------|------|---------|--------| -| 42-44 | 测试基础设施 | 22 个 smoke tests, CLI 集成测试层 (`tests/cli.rs`), Criterion 基准测试 (`benches/registry_bench.rs`) | — | -| 45-47 | Tier 1 测试收尾 | 28 个 smoke tests 覆盖 embeddings/semantic_search/cross-repo/search/workflow/backup/registry;`SCHEMA_DDL` 补录 4 表;`init_db()` 并发安全 (`BEGIN EXCLUSIVE`);测试数据隔离统一为 `DEVBASE_DATA_DIR` | — | -| 1 | 代码质量 | `rustfmt.toml`, clippy 清零 | `4efcd58` | -| 2 | 模块拆分 | `sync/`, `registry/`, `mcp/tests.rs` | `4efcd58` | -| 3 | 工程化 | `src/lib.rs`, CI workflow, `main.rs` 简化 | `4efcd58` | -| 4 | 依赖/审计 | `notify` 8.2.0, `tokei` 14.0.0 | `4efcd58` | -| 5 | TUI 美学与工程学 | 主题系统, Tabs, Help Overlay, Render 拆分 | `6b9be88` | -| 6 | 数据层深度能力 (MVP) | 语义索引、调用图、依赖图、死代码检测、Python 依赖解析 | `9fbf7c4` | -| 7 | 向量语义搜索 | `embedding.rs`, `code_embeddings` 表, `devkit_semantic_search` | `4d400b1` | -| 8 | 多语言符号提取 | tree-sitter-python/typescript/go, Rust/Python/JS/Go 符号 + Call Graph | `4f4911b` | -| 9 | scan panic 修复 + arXiv/CMake | `block_on_async` 安全封装, arXiv API 元数据, CMakeLists.txt 依赖解析 | `881cd32` | -| 10 | OpLog 结构化 | Schema v12, OplogEventType 枚举, JSON metadata, duration_ms | `7aa2a65` | -| 11 | 性能基准 | criterion benches: index_repo_full, cosine_similarity, extract_symbols, CMake | `8e0f236` | -| 12 | 混合检索核心 | `search::hybrid.rs`: RRF 归并, keyword_search, hybrid_search_symbols | `7fca714` | -| 13 | 外部 Embedding Provider | Python CLI `examples/embedding-provider/`, Ollama 批量生成, 字节兼容序列化 | `574fb96` | -| 14a | 跨 repo 语义聚合 | `cross_repo_search_symbols()` INTERSECT tag 过滤, `devkit_cross_repo_search` | `8e762c7` | -| 14b | 知识覆盖报告 | `oplog_analytics.rs`: 表存在性容错, 覆盖度/健康度/活动流, `devkit_knowledge_report` | `869bcbf` | -| 15a | 显式知识链接 | Schema v13 `code_symbol_links`, Jaccard 签名相似度, 同文件聚类, `devkit_related_symbols` | `d462209` | -| 15b | 混合检索 MCP Tool | `devkit_hybrid_search`: 向量+RRF+关键词自动降级, 推荐默认搜索入口 | `6df6106` | -| 16a | Skill Runtime Schema | `skills` + `skill_executions` 表, SKILL.md 解析器, Registry CRUD, 3 内置 skills | `e41eccb` | -| 16b | Skill 发现与搜索 | 文本搜索 + 语义搜索 (`--semantic`), skill embedding 生成脚本 | `48b96c6` | -| 17 | Skill 执行引擎 | Process-based executor, interpreter 自动解析, timeout, stdout/stderr 捕获, 执行审计 | `99d818e` | -| 18 | MCP Skill 集成 | `devkit_skill_list` / `devkit_skill_search` / `devkit_skill_run` 3 个 tools | `c80fdec` | -| 19a | Skill 生态(安装/发布) | `install_skill_from_git` (git2 clone), `publish` (validate + git tag + push remote) | `8120e4d` | -| 19b | Skill 生态(同步/TUI) | `sync --target clarity` (导出为 Clarity plan JSON), TUI Skill Panel (`k` keybinding) | `678c70c` | -| 20 | Skill 依赖管理 | Schema v15 `dependencies` 列,Kahn 拓扑排序,DFS 环检测,自动安装缺失依赖,`install`/`run`/`validate` 集成 | `75fed3c` | -| 21 | 统一实体模型 + 自动封装 | Schema v16 `entities/entity_types/relations`,渐进双写;`discover` 命令(Rust/Node/Python/Go/Docker/Generic 检测 + SKILL.md 自动生成 + entry_script 包装器);分类推断(ai/dev/data/infra/communication) | — | -| 22 | AppContext Pool 化 | `r2d2::Pool` 替代单 Connection;22 个 commands/TUI/MCP 全链路迁移;`init_db()` 89→5 处;MCP 测试临时目录隔离;search 多线程竞态自愈 | — | -| 23 | Registry God Object 拆解 | 提取 10 子模块(repo/vault/workspace/health/metrics/links/known_limits/knowledge_meta/knowledge)为 free-function 模块;`WorkspaceRegistry` 退化为向后兼容门面;~150 处调用点迁移;0 测试回归 | `dfc43d4` | - -## 敏感文件清单(禁止提交) - -| 文件/模式 | 原因 | .gitignore 覆盖 | -|-----------|------|----------------| -| `*.db` | SQLite 数据库含用户仓库元数据 | ✅ | -| `.devbase/` | 本地 sync 标记和工作区状态 | ✅ | -| `*.log` | 可能含路径或错误堆栈信息 | ✅ | -| `.env*` | 环境变量和 secrets | ✅ | -| `*.local.toml` | 本地覆盖配置 | ✅ | -| `target/` | 构建产物 | ✅ | - -## 跨项目接口 - -- **clarity-core**:已解除路径依赖。devbase 不再被 clarity-core 调用,LLM 能力内联为纯 reqwest -- **syncthing-rust**:`.syncdone` 标记格式已对齐 - -## 架构讨论摘要(来自 2026-04-24 会话) - -以下为本项目相关的粗粒度架构决策与待探索方向。 - -### 1. 自指知识库:五层知识模型 - -devbase 作为知识库存储层,需支持 L0-L4 五层索引: - -| 层级 | 内容 | 生长信号 | 遗忘机制 | -|------|------|---------|---------| -| L0 对象 | 外部知识块(代码、文档、日志) | 检索频率、引用次数 | 版本冻结 | -| L1 方法 | 操作知识的方法(检索/分块/向量化) | 检索成功率、延迟分布 | A/B 测试 | -| L2 哲学 | 设计原则(本地优先、奥卡姆剃刀) | 架构决策事后验证 | 外部论文扰动 | -| L3 风险 | 系统弱点图谱 | 故障事件、异常日志 | 红队攻击 | -| L4 元认知 | 关于 L1-L3 的元知识 | 人类纠正、跨会话一致性 | 形式化验证 | - -**决策**:粗粒度与细粒度知识保留独立索引;细粒度存 SQLite(快速查询),粗粒度存 Vector DB(语义检索)。 - -### 2. 审计日志(OpLog) - -- P3 不可靠交付的使用追踪写入 OpLog,实现事后追溯 -- 边界图谱版本历史、探索任务结果写入 OpLog -- 所有验证消息(请求+响应+共识)写入 OpLog - -### 3. 外部资源调度器 - -devbase 承载外部资源调度的抽象接口: - -- **形式化工具**:TLA+/Coq/Lean(本地路径或远程地址) -- **人类专家**:异步审批,不阻塞夜间批处理 -- **P2P 节点**:复用 syncthing-rust 的 Device ID 与传输层 -- **文献检索**:arXiv / Semantic Scholar API - -**决策**:定义资源请求的抽象接口与排队策略;具体调度算法不进当前 scope。 - -### 4. 边界图谱存储 - -- `BoundaryMap` 存储已知限制(KnownLimit)的版本历史 -- `ExplorationTask` 队列记录边界外待探索任务 -- 跨实例同步:通过 syncthing-rust P2P 网络同步边界快照 - -### 5. 安全计算(MPC/TEE) - -- 当前四个项目中无密码学层归属 -- **短期**:devbase MCP 接口可封装外部 TEE 服务(如 Azure Confidential Computing) -- **长期**:如需自建,新建 `clarity-tee` 或 `devbase-secure` 子项目 - -## 当前阶段待办(v0.15.0 推进中) - -v0.11.3 已交付(tagged)。v0.12.0-alpha 全部功能已完成,进入发布治理阶段。 - -| 方向 | 状态 | 说明 | -|------|------|------| -| `init_db()` → `AppContext` 迁移 | 🟢 | Pool 化完成,`init_db()` 从 89 处降至 5 处合法保留,全部 commands/TUI/MCP 已接入 | -| Tantivy+SQLite 双写一致性 | 🟡 | 无事务协调,需补偿机制或 FTS5 替代评估 | -| tree-sitter 编译成本 | 🟡 | ~15-20s,评估 ccache 或 grammar 预编译 | -| Feature flags 扩展 | 🟡 | 2/3(tui, watch),mcp 等模块待评估 | - ---- - -## 历史完成记录(v0.4.0 – v0.10.0) - -### 阶段二任务(v0.4.0 AI Skill 编排基础设施) - -| 波次 | 任务 | 状态 | 交付物 | -|------|------|------|--------| -| Wave 21 | Schema v16 + 自动封装 | ✅ 已完成 | `entity_types/entities/relations` + `devbase skill discover` | -| Wave 22 | discover 硬化 | ✅ 已完成 | `--install` 真正注册 + Git URL 直接克隆封装 | -| Wave 23 | Workflow 预留 | ✅ 规范已完成 | `docs/architecture/workflow-dsl.md` | -| Wave 24 | Workflow Engine v0.5.0 | ✅ 已完成 | YAML 解析 + 拓扑调度 + batch 并行执行 + 5 step 类型 | -| Wave 25 | TUI Workflow 可执行 | ✅ 已完成 | `[w]` 详情页 `r/Enter` 运行 + 结果弹窗 | -| Wave 26 | NLQ 自然语言查询 v0.7.0 | ✅ 已完成 | `[:]` 触发 embedding 语义搜索 + fallback 降级 | -| Wave 27 | Mind Market 评分 v0.6.0 | ✅ 已完成 | `success_rate`/`usage_count`/`rating` + `recalc-scores`/`top`/`recommend` | -| Wave 28 | 7 个风险点修复 v0.7.1 | ✅ 已完成 | EnvGuard、NLQ fallback、StepType 显式标签、跨平台解释器探测 | -| Wave 29 | Workflow 子类型执行 v0.8.0 | ✅ 已完成 | Subworkflow 递归 + Parallel 聚合 + Condition 表达式求值 | -| Wave 30 | 生产代码 unwrap 清零 | ✅ 已完成 | 29 个生产代码 unwrap → 0,`cargo clippy -D warnings` 全绿 | -| Wave 31 | NLQ 结果可执行 v0.8.1 | ✅ 已完成 | `[:]` 搜索结果按 Enter 直接运行 skill,event+state+render 三文件修改 | -| Wave 32 | NLQ smoke test | ✅ 已完成 | `run_nlp_selected_skill` 空列表/无技能/执行管道测试,267 tests passed | -| Wave 33 | TUI SkillPanel 拆分 | ✅ 已完成 | 7 个 skill 字段提取到 `SkillPanelState`,App 51→44 字段 | -| Wave 34 | Workflow Loop Step 硬化 | ✅ 已完成 | `StepType::Loop { body }` + `execute_loop_step` + `${loop.item}` / `${loop.index}` | -| Wave 35 | L3 Risk Layer MVP | ✅ 已完成 | Schema v18 `known_limits` + Registry CRUD + MCP tools + CLI `limit` + OpLog 集成 | -| Wave 36 | L4 元认知层 MVP | ✅ 已完成 | Schema v19 `knowledge_meta` + Registry CRUD + `--reason` resolve + L3-L4 联动 | -| Wave 37 | Hard Veto 运行时守卫 | ✅ 已完成 | `skill_runtime::executor` 执行前检查未解决 hard veto,警告注入 stderr + OpLog 审计 | - -### 明确不做(已排除) - -- SSE transport(stdio 已覆盖主流 Client) -- `.devbase` 目录规范(无外部采纳者) -- MCP 协议扩展提案(Star = 0,不会被采纳) -- 商业化 / 付费版 -- ~~拆分 crate(50+ tools 后再评估)~~ → **重新评估**:已触发外部架构审查(§九 耦合检查,6 领域耦合),`workflow`/`skill` 与 Claude Code Agent 重叠,v0.16.0 需输出拆分方案(`devbase-core` / `devbase-sync` / `devbase-bridge`) - -### Future / Icebox(无排期) - -1. ~~输出 L0-L4 五层知识的 TOML/JSON Schema 草案~~(保持开放,非阻塞) -2. ~~输出 OpLog 审计事件类型清单~~(已有基础枚举,保持增量) -3. ~~输出外部资源调度的请求格式草案~~(保持开放) -4. **不做**:调度算法、边界图谱引擎、哲学规则库内容、密码学协议 - -### Post-Wave 19 triage 结论(2026-04-25) - -| 优先级 | 事项 | 状态 | -|--------|------|------| -| P1 | SSE 传输状态与 README 一致性 | ✅ 已完成 — README 修正为 "stdio only; SSE in development",见 commit `935dd61` | -| P2 | 架构预拆分评估 | ✅ 已完成 — 评估报告位于 `docs/architecture/pre-split-evaluation.md`,结论:22.7 KLOC 单 crate 仍最优, defer 至 50+ tools 或编译 > 60s | -| P3 | 竞品定位标语 | ✅ 已完成 — README 顶部标语更新为 "AI 无法识别你的 GUI,devbase 是它的眼镜。" | -| P4 | 开发者 onboarding 文档 | ✅ 已完成 — `CONTRIBUTING.md` + README Contributing 章节(devbase + clarity) | - -- **Tag**: `v0.10.0` 已打标(最新);`v0.2.4` 及之前标签见 Git history -- **Roadmap**: `docs/ROADMAP.md` 为唯一活跃主路线图 - -## Embedding 策略长期规划(已决策) - -**方向**:混合方案 — 模型向量语义搜索 + tantivy BM25 降级 - -| 层级 | 触发条件 | 技术方案 | 状态 | -|------|----------|----------|------| -| L1 向量语义 | `code_embeddings` 表有数据 | Ollama/OpenAI-compatible 生成 768-dim embedding,余弦相似度 Top-K | 已实现,待激活(需 Ollama 运行) | -| L2 全文搜索 | `code_embeddings` 为空或服务不可用时 | tantivy 索引代码符号(function name + signature + doc comment),BM25 评分 | 基础设施就绪,待接入 `semantic_search_symbols` | -| L3 纯符号匹配 | 查询为精确标识符 | SQLite `LIKE '%name%'` 快速匹配 | 已有 | - -**关键决策**:不绑定 Ollama 为唯一 provider。未来可能替换 embedding 生成层为: -- 本地 C++ 推理引擎(如 llama.cpp / onnxruntime) -- 纯 Rust 推理引擎(如 rust-bert / candle) -- 外部 MCP / Skill 封装(embedding 作为独立服务) - -**Embedding 状态**: -- `code_embeddings`: **56,722** 行(37.0% 覆盖率),覆盖 10 个仓库 -- `skills.embedding`: 3 个 builtin skill 已有 384-dim 向量 -- 生成工具:`examples/embedding-provider/skills.py`(sentence-transformers `all-MiniLM-L6-v2`) -- 激活路径:启动 Ollama + `devbase index ` 生成 embedding,或配置远程 provider 于 `config.toml [embedding]` 段 - -### 2026-05-04 索引性能实验记录 - -**发现**:Candle CPU BERT `batch_size=32` forward 比 `rayon` 并行单条慢 **5.2×**(88s vs 16s)。 -- 根因:Candle CPU matmul 对大 padded batch 不友好;batch 内序列长度差异导致大量无效 padding token 计算。 -- **决策**:`generate_and_save_embeddings` 回滚到 `rayon::par_iter()` 单条编码;保留 `EmbeddingProvider::encode_batch` trait 方法供未来 GPU/ONNX provider。 -- **新增**:`devbase index --skip-embeddings` 跳过 embedding 生成,纯符号/调用图索引从 ~16s 降至 ~250ms。 - -**外部参考**:知识蒸馏 Pipeline 设计规格(六阶段:噪声过滤→语义分割→主题聚类→层级展开→可信度标注→结构化输出),来源见 `docs/_audit/2026-04-26-embedding-research.md` §2026-05-04 补充。该规格提出通过 devbase MCP 暴露 `devkit_knowledge_distill` 工具,与 Vault 系统形成输入-处理-输出闭环。状态:设计规格级,待验证后评估集成优先级。 - -## 上下文安全机制(Context Safety Mechanism) - -> 长期架构原则:在多 Agent / 子代理协作场景下,保证工作区状态的一致性与可恢复性。 - -### 1. 子代理执行隔离 - -**教训**(2026-04-25 实际发生):多个子代理在同一 Git 工作目录并行执行 `git checkout`/`git commit` 会导致严重的分支混乱。`agent-publish` 和 `agent-tui` 的修改互相覆盖,最终 commit 被错误地放置到对方分支, stash 中混入了不相关的代码。 - -**规则**: -- **串行优先**:多个子代理任务必须串行执行,每次 commit 后切回 main 再启动下一个 -- **目录隔离**:若必须并行,每个子代理在独立的 `git clone` 临时目录工作,完成后由主会话 cherry-pick -- **禁止共享工作目录**:多个 Agent 绝不能同时操作同一个 `.git` 目录 -- **编译检查**:任何子代理返回前必须通过 `cargo test --lib`,否则标记为脏状态 - -### 2. MCP 工具幂等性 - -**原则**:所有通过 MCP 暴露的状态变更操作必须是幂等的。 - -**实现**: -- `save_embeddings` — `ON CONFLICT(repo_id, symbol_name) DO UPDATE` -- `save_symbol_links` — `ON CONFLICT(source_repo, source_symbol, target_repo, target_symbol, link_type) DO NOTHING` -- `index_repo` — 先 `DELETE` 旧数据再 `INSERT`(而非追加) -- 所有批量操作包裹在 SQLite transaction 中 - -### 3. 状态变更审计追踪 - -**原则**:任何对 registry 的写入都必须留下不可变的审计痕迹。 - -**实现**: -- OpLog Schema v12+:`event_type` 枚举 + JSON metadata + `duration_ms` -- 所有 `scan`/`sync`/`health`/`index` 操作自动记录 -- Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db` 快照 -- `registry export --format json` 支持用户自主备份 - -### 4. 知识库一致性契约 - -**原则**:存储层(devbase)与计算层(Clarity/Skill)之间的接口契约必须显式、可版本化。 - -**当前契约**: -| 方向 | 接口 | 版本 | -|------|------|------| -| 外部 → devbase | `devkit_embedding_store(repo_id, symbol_name, embedding[])` | v1 | -| devbase → 外部 | `devkit_hybrid_search(repo_id, query_text, query_embedding?, limit)` | v1 | -| devbase → 外部 | `devkit_knowledge_report(repo_id?, activity_limit)` | v1 | - -**变更规则**:MCP tool schema 的 breaking change 必须通过新增 tool(如 `devkit_hybrid_search_v2`)而非修改现有 tool。 - -### 5. Sync Fail-Safe Defaults(Managed-Gate) - -**原则**:危险操作(修改本地 Git 分支)必须默认拒绝,仅对显式授权仓库生效。 - -**实现**(v0.12.0-alpha+): -- `scan --register` 不再自动分配 `"discovered"` 标签;新注册仓库标签为空。 -- 默认 `devbase sync`(无 `--filter-tags`)仅操作带有**管理标签**的仓库: - `mirror`, `reference`, `third-party`, `collaborative`, `team`, - `own-project`, `tool`, `active`, `managed`。 -- 未管理仓库仍被追踪(`health`/`index`/`query` 不受影响),但 `sync` 会跳过它们并提示: - `ℹ️ N registered repositories are not managed. Use 'devbase tag managed' to enable sync.` -- `--filter-tags` 显式过滤时绕过管理门控,允许用户按需选择任意标签组合。 -- 现有 DB 中带有旧 `"discovered"` 标签的仓库自动变为未管理状态(无需迁移,即安全修复)。 - -**用户操作路径**: -1. `devbase scan --register` → 仓库入库,标签为空。 -2. `devbase tag managed`(或 `mirror` / `active` 等)→ 授权该仓库进入自动同步池。 -3. `devbase sync` → 仅对已授权仓库执行同步。 - ---- - -## 上下文压缩缓解(Kimi CLI / LLM Agent 会话恢复) - -> Kimi CLI 等 Agent 环境存在上下文窗口限制,长会话会被压缩/截断。以下措施确保压缩后新 Agent 能独立恢复工作上下文。 - -### 1. 文件自描述原则 - -所有 `src/` 根目录模块和 `crates/*/src/lib.rs` 必须在文件顶部包含: -- **一句话职责**:这个文件解决什么问题 -- **边界说明**:它依赖谁、谁依赖它(或明确声明"零内部依赖") -- **关键决策注释**:任何非直观的设计选择必须有 `// NOTE:` 或 `// DECISION:` 注释 - -### 2. 状态锚点文件(Context Anchor Files) - -以下文件必须保持最新,作为会话压缩后的恢复点: - -| 文件 | 用途 | 更新触发条件 | -|------|------|-------------| -| `docs/ai-protocol.md` | 架构快照、待办、耦合地图 | 每次架构变更后 | -| `AGENTS.md` | 环境指引、红线规则、历史决策 | 每次红线变更后 | -| `Cargo.toml` workspace 声明 | 模块结构、crate 列表 | 每次提取/新增 crate | -| 各 `crates/*/Cargo.toml` | 独立 crate 的依赖与版本 | 每次依赖变更 | - -### 3. 反模式:禁止在代码中埋藏隐式上下文 - -- ❌ `// 之前讨论过这个方案` — 压缩后无法追溯 -- ❌ `// 见上文的 TODO` — "上文"已丢失 -- ✅ `// DECISION(2026-05-01): 使用 rusqlite::Connection 而非 r2d2 Pool,因为单个写入线程不需要连接池` — 自包含 - -### 4. 会话交接模板 - -当检测到上下文可能被压缩时,Agent 应在恢复后执行以下自检: - -```bash -# 1. 确认当前架构状态 -cargo test --workspace 2>&1 | grep "test result" -# 2. 确认 workspace 成员 -cargo metadata --format-version 1 | jq '.workspace_members' -# 3. 确认耦合地图(快速扫描高耦合模块) -grep -rc 'crate::' src/*.rs | sort -t: -k2 -n | tail -5 -``` - -## 知识库生产级缺口与补齐路线(Knowledge Base Production Gap) - -> 该章节记录 devbase 作为知识基础设施与生产级要求之间的真实差距,以及消除"玩具感"的补齐路径。 -> **核心原则**:devbase 首先是一个可靠的本地知识基础设施,然后才是一个 World Model Compiler。AI 层是编译器的输出接口,但如果存储层不可靠,AI 就是沙上建塔。 - -### 缺口诊断(与生产级知识库对比) - -| 能力维度 | 当前现状 | 生产级要求 | 缺口等级 | -|:---|:---|:---|:---:| -| **存储可靠性** | SQLite 单文件;Schema 迁移前自动快照 | WAL 并发模式、增量备份、索引损坏自动检测重建、点对点恢复 | 🔴 **严重** | -| **检索质量** | BM25 + 768-dim `cosine_similarity` SQL UDF | Hybrid RRF 调优、Re-rank、多路召回、查询延迟可观测 | 🔴 **严重** | -| **知识图谱** | `relation_store/query` 简单三元组 | 双向链接图遍历、Transitive Closure、社区发现、本体约束 | 🟠 **显著** | -| **版本历史** | 代码有 Git;Vault 笔记无版本 | 笔记块级历史、分支、冲突合并策略 | 🟠 **显著** | -| **规模化** | 单机 Rayon;未验证 >100 仓库 / >10k 文档场景 | 索引分片、增量更新、查询缓存、内存上限保护 | 🟠 **显著** | -| **互操作性** | Vault 读写 Markdown | Obsidian 兼容(frontmatter/wikilink)、标准导入导出、避免 Vendor Lock-in | 🟡 **中等** | -| **多模态** | 文本为主 | PDF 解析、图片 OCR、音频转录 | 🟡 **可延期** | -| **协作** | 单用户 + Syncthing 文件级同步 | 冲突解决(CRDT/OT 或至少 last-write-win)、多设备状态一致性 | 🟠 **显著** | - -### 补齐路线图 - -#### 🔴 v0.19.0:存储可靠性加固(消除"玩具感"的最快路径) - -| 任务 | 优先级 | 验收标准 | -|:---|:---:|:---| -| SQLite WAL 模式默认启用 | P1 | 并发写入无锁定冲突;`PRAGMA journal_mode=WAL` 持久化 | -| Tantivy 索引健康检查 `devkit_index_health` | P1 | 检测索引损坏、版本不匹配、孤儿文档;返回健康评分 0-100 | -| 自动重建策略 | P1 | 索引损坏时自动 fallback 全量重建,而非静默失败;重建过程写入 OpLog | -| 查询性能基线测试 | P1 | CI 中测试 1k/10k/100k 文档量级的检索延迟;建立性能回归红线 | -| Vault 批量导出(Markdown + frontmatter) | P2 | `devkit_vault_export` 支持 PARA 结构完整导出;消除 Vendor Lock-in 焦虑 | -| Redis 缓存评估 | P2 | 完成 Session/向量缓存需求分析;决策:引入 / 自建 / 放弃 | - -#### 🟠 v0.20.0:知识完备性(从"能存"到"好用") - -| 任务 | 优先级 | 验收标准 | -|:---|:---:|:---| -| Vault 双向链接图遍历 | P1 | `vault_backlinks` 升级为图查询:最短路径、共同引用、引用频次 | -| 笔记变更追踪 | P1 | Vault 笔记历史基于 Git 追踪(vault 目录作为 Git 子模块)或 SQLite 增量历史表 | -| 混合检索质量监控 | P1 | RRF 参数可调(`k`、`weights`)、召回率/精确率指标、`devkit_search_quality` 工具 | -| 笔记块级引用 `[[note#block]]` | P2 | 从文档级粒度下沉到块级;支持标题块、列表块、代码块引用 | -| middleware.ts 错误修复 | P2 | 解决已知未解决错误,见技术债登记簿 | - -#### 🟡 v0.21.0+:外部能力嫁接(不重复造轮子) - -| 任务 | 来源 | 集成方式 | -|:---|:---|:---| -| 多说话人播客/测验生成管道 | Open Notebook | 提取生成模块作为外部 MCP Tool,devbase 提供文档输入 | -| Agent 协作与多 LLM 路由 | SurfSense | 参考 Agent 架构,融入 Clarity 三角色世界模型 | -| 时序观测基础设施 | GreptimeDB | Standalone 模式起步,替代 Prometheus,监控索引和查询健康度 | -| 向量索引统一(远期) | GreptimeDB v1.1 | 评估替代 Tantivy+SQLite 双写架构的可行性 | - -### 技术债关联更新 - -| 债项 | 严重 | 状态变更 | 清理路径 | -|:---|:---:|:---|:---| -| Tantivy+SQLite 双写一致性 | 🟡 | **从长期降级至 v0.19.0 P1** | WAL + 补偿机制 + `devkit_index_health` | -| SQLite 单文件并发 | 🔴 | **新增** | v0.19.0 WAL 模式启用 | -| 查询性能不可观测 | 🔴 | **新增** | v0.19.0 CI 性能基线 + OpLog 延迟指标 | -| Vault 无版本历史 | 🟠 | **新增** | v0.20.0 Git 追踪或增量表 | - -### 决策约束 - -1. **v0.19.0 禁止新增非可靠性相关的 MCP Tool**。所有新增 Tool 必须与存储健康、可观测性、或索引修复直接相关。 -2. **v0.19.0 禁止引入外部数据库依赖**(包括 GreptimeDB、Redis、PostgreSQL)。可靠性加固必须在现有 SQLite + Tantivy 技术栈内完成。 -3. **世界模型研究(Spark/Flink/时序图神经网络)保持独立仓库**,主仓库继续执行"不得引入 Spark/Flink 依赖"红线。 - ---- - -## 架构演进方向:世界模型战略(World Model Strategy) - -> 该章节记录 devbase 从"静态情境编译器"向"动态世界模型"演进的战略认知。 -> 完整推导见 `vault/research/world-model-spark-flink-strategy.md`,精简认知见 `vault/ideas/world-model-cognition-card.md`。 - -### 核心认知 - -devbase 的终极壁垒不是"管理仓库的工具",而是**把静态代码库编译成 AI 可推理的动态世界模型**。 - -当前 devbase 是**静态世界模型编译器**——能把代码库的"当前快照"编译成 AI 可读的符号表征(调用图、知识图谱、Agent Memory),但不具备**时间维度**和**因果维度**的建模能力。 - -### 三层缺口分析 - -| 层级 | 当前能力 | 缺口 | 研究价值 | -|:---|:---|:---|:---:| -| **感知层** | AST、Git 状态、Vault 索引 | 时序演化感知、群体协作行为 | 中 | -| **世界模型层** | 调用图、知识图谱、向量空间 | 动态转移预测、因果结构、反事实推演 | **高** | -| **策略应对层** | 预设 Workflow 规则 | 自动规划、风险预测、基于模型的决策 | **高** | - -### 关键决策原则 - -1. **产品核心**:坚持 Local-first、Rust-native、zero ML runtime。世界模型训练可在云端,**推理必须下沉到本地**。 -2. **技术选型**:Spark/Flink 是可替换的数据工程管道,不是竞争壁垒。 -3. **差异化**:静态→动态的世界模型升级,是学术+工程的双重壁垒。 - -### Spark/Flink 定位 - -从世界模型视角,Spark/Flink 仅处于**数据工程层**: -- **Spark**:批量构建全局代码演化图谱、分布式因果发现(变量 > 10k 时有用) -- **Flink**:实时事件处理、多开发者世界模型同步 - -在单机/小团队场景下,两者均可用 `rayon` + `tokio` + `SQLite WAL` 替代。真正的研究核心在于**时序图神经网络、因果发现、世界模型压缩**,而非分布式框架本身。 - -### 两条验证路径 - -| 路径 | 形式 | 产出 | 与主仓库关系 | -|:---|:---|:---|:---| -| **学术原型** | 独立仓库 `devbase-worldmodel-research` | ICSE/FSE/NeurIPS Workshop 论文 | 复用 devbase AST 逻辑做数据预处理,模型通过 MCP 接入 | -| **求职映射** | 简历话语 | "基于 Spark/Flink 构建代码知识图谱的动态演化分析系统" | 实际支撑:devbase 符号提取 + 独立研究仓库分布式训练 | - -### 待验证假设 - -- [ ] 时序图神经网络能否预测模块缺陷爆发时间窗口? -- [ ] 因果发现算法能否从 git history 提取可靠的干预建议? -- [ ] 世界模型压缩后,本地推理延迟能否控制在 < 100ms? - -### 关联笔记(双向联动) - -| 笔记 | 类型 | 用途 | -|:---|:---|:---| -| `vault/research/world-model-spark-flink-strategy.md` | 完整推导 | 世界模型三层架构、Spark/Flink 定位、研究方向建议 | -| `vault/ideas/world-model-cognition-card.md` | 精炼认知 | 快速查阅:一句话认知、决策原则、反常识洞察 | - -> **认知同步原则**:AGENTS.md 是项目级**约束文档**,Vault 笔记是**探索空间**。若 Vault 中的假设被验证,应反向同步到 AGENTS.md 的"待验证假设"中并打勾;若 AGENTS.md 的决策原则变更,应同步更新 Vault 认知卡片。 - -## 禁止事项 - -- 不得修改 `dev\third_party\*` 外部仓库 -- 不得在没有迁移逻辑的情况下修改 registry schema -- 不得引入已 deprecated 的协议 -- **不得在主仓库引入 Spark/Flink 依赖**(研究性质代码必须置于独立仓库,保持主仓库轻量) -- **不得在任何源码文件中硬编码真实 token、api_key 或密码**(包括注释和测试数据) diff --git a/docs/README.md b/docs/README.md index a378b0a..81a56d7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,23 +1,24 @@ # devbase 文档导航 -> **项目状态**:v0.14.3 — 性能优化 + 架构治理 + RF-6 清零 -> **主入口**:[`AGENTS.md`](../AGENTS.md)(Agent 环境指引)· [`ROADMAP.md`](ROADMAP.md)(功能路线图) -> **最后整理**:2026-05-08 +> **项目状态**:v0.20.1 · Schema v36 · 71 MCP tools · 605 tests · 12 workspace crates +> **权威入口**:[`AGENTS.md`](../AGENTS.md)(Agent 环境指引)· [`CHANGELOG.md`](../CHANGELOG.md)(版本变更) +> **最后整理**:2026-06-13 --- ## 实时状态看板 -| 指标 | 数值 | -|------|------| -| 版本 | v0.14.3 | -| 测试 | 408 passed / 0 failed / 3 ignored | -| Clippy | 0 warnings | -| Schema | v23 | -| MCP Tools | 38 个(Stable 5 / Beta 29 / Experimental 4) | -| 代码行数 | ~32 KLOC | -| RF-6 | ✅ 生产代码 unwrap/expect 清零 | -| 已提取 Crates | 18 个 workspace crate | +| 指标 | 数值 | 来源 | +|------|------|------| +| 版本 | v0.20.1 | `Cargo.toml` | +| Rust Edition | 2024 | `Cargo.toml` | +| 测试 | 605 passed / 0 failed / 7 ignored | `cargo test --workspace -- --list` | +| Clippy | `-D warnings` 全绿 | CI | +| Schema | v36 | `src/registry/migrate.rs` | +| MCP Tools | **71**(5 Stable / 62 Beta / 4 Experimental) | `src/mcp/mod.rs` | +| Workspace Crates | 12 | `crates/` | +| `main.rs` | 833 行(RF-4 ≤ 1000) | `wc -l` | +| RF-6 | ✅ 生产代码 unwrap/expect/panic 清零 | invariant checks | --- @@ -29,11 +30,14 @@ | 用户 | 完整 CLI 命令参考 | [`guides/cli-reference.md`](guides/cli-reference.md) | | 用户 | 如何接入 MCP(Kimi / Claude / Cursor) | [`guides/mcp-integration.md`](guides/mcp-integration.md) | | 用户 | Vault 笔记格式 + PARA 工作流 | [`guides/vault-format.md`](guides/vault-format.md) · [`guides/vault-workflow.md`](guides/vault-workflow.md) | -| 开发者 | 数据库 Schema 完整定义 | [`reference/schema-v23.md`](reference/schema-v23.md) | +| 用户 | Embedding Provider 配置 | [`guides/embedding-provider-setup.md`](guides/embedding-provider-setup.md) | +| 开发者 | 数据库 Schema 完整定义 | [`reference/schema-v23.md`](reference/schema-v23.md) ⚠️ 待更新至 v36 | | 开发者 | 统一实体模型(entities/relations) | [`reference/entities-model.md`](reference/entities-model.md) | -| 开发者 | 38 个 MCP 工具速查 | [`reference/mcp-tools.md`](reference/mcp-tools.md) | +| 开发者 | 71 个 MCP 工具速查 | [`reference/mcp-tools.md`](reference/mcp-tools.md) | | Agent | 项目架构定义 | [`architecture/context-compiler.md`](architecture/context-compiler.md) | -| 所有人 | 功能路线图 | [`ROADMAP.md`](ROADMAP.md) | +| Agent | 架构红线与不变量 | [`architecture/invariants.md`](architecture/invariants.md) | +| 维护者 | 功能路线图 | [`ROADMAP.md`](ROADMAP.md) | +| 维护者 | 已知问题与技术债务 | [`../KNOWN_ISSUES.md`](../KNOWN_ISSUES.md) | --- @@ -45,12 +49,17 @@ | 文档 | 说明 | |------|------| -| [`architecture/context-compiler.md`](architecture/context-compiler.md) | **v0.13.0 核心定义**:本地情境编译器 — 五层架构、六维信息模型、与 AI Agent 的契约 | -| [`architecture/workflow-dsl.md`](architecture/workflow-dsl.md) | Workflow DSL v0.4.0 规范(YAML 多步骤编排) | -| [`architecture/dependency-topology.md`](architecture/dependency-topology.md) | 模块依赖拓扑(Tier 1–11 自底向上进化顺序) | -| [`architecture/pre-split-evaluation.md`](architecture/pre-split-evaluation.md) | 单 crate vs 多 crate 评估结论 | -| [`architecture/adr-template.md`](architecture/adr-template.md) | ADR 模板与已完成决策索引(batch encoding、split defer) | -| [`architecture/invariants.md`](architecture/invariants.md) | 架构不变量清单:全局 + 分层约束 + 提取演习检查表 | +| [`architecture/overview.md`](architecture/overview.md) | 三层架构:字节 → 语义 → 行动 | +| [`architecture/context-compiler.md`](architecture/context-compiler.md) | **核心定义**:本地情境编译器 — 五层架构、六维信息模型 | +| [`architecture/workflow-dsl.md`](architecture/workflow-dsl.md) | Workflow DSL 规范(YAML 多步骤编排) | +| [`architecture/dependency-topology.md`](architecture/dependency-topology.md) | 模块依赖拓扑(Tier 1–11) | +| [`architecture/invariants.md`](architecture/invariants.md) | **架构红线** RF-1~RF-7 + 分层约束 G/T | +| [`architecture/split-plan.md`](architecture/split-plan.md) | Workspace crate 拆分计划 | +| [`architecture/pre-split-evaluation.md`](architecture/pre-split-evaluation.md) | 单 crate vs 多 crate 评估 | +| [`architecture/adr-template.md`](architecture/adr-template.md) | ADR 模板与已完成决策索引 | +| [`architecture/adr-003-tantivy-sqlite-consistency.md`](architecture/adr-003-tantivy-sqlite-consistency.md) | ADR-003:Tantivy/SQLite 一致性 | +| [`architecture/adr-004-mcp-trait-decoupling.md`](architecture/adr-004-mcp-trait-decoupling.md) | ADR-004:MCP trait 解耦 | +| [`architecture/adr-005-appcontext-clone.md`](architecture/adr-005-appcontext-clone.md) | ADR-005:AppContext Clone 边界 | ### 📖 使用指南(Guides) @@ -58,11 +67,13 @@ | 文档 | 说明 | |------|------| -| [`guides/quickstart.md`](guides/quickstart.md) | 5 分钟上手指南:安装 → 扫描 → 索引 → MCP 配置 | -| [`guides/cli-reference.md`](guides/cli-reference.md) | 完整 CLI 子命令参考(scan/health/sync/index/vault/...) | -| [`guides/mcp-integration.md`](guides/mcp-integration.md) | MCP 集成指南:Kimi CLI / Claude Code / Cursor 配置 | -| [`guides/vault-format.md`](guides/vault-format.md) | Vault 笔记格式规范(YAML frontmatter + Markdown) | -| [`guides/vault-workflow.md`](guides/vault-workflow.md) | PARA 目录结构实践(Inbox → Projects → Areas → Resources → Archives) | +| [`guides/quickstart.md`](guides/quickstart.md) | 5 分钟上手指南 | +| [`guides/cli-reference.md`](guides/cli-reference.md) | 完整 CLI 子命令参考 | +| [`guides/mcp-integration.md`](guides/mcp-integration.md) | MCP 集成指南(Kimi / Claude / Cursor) | +| [`guides/vault-format.md`](guides/vault-format.md) | Vault 笔记格式规范 | +| [`guides/vault-workflow.md`](guides/vault-workflow.md) | PARA 目录结构实践 | +| [`guides/embedding-provider-setup.md`](guides/embedding-provider-setup.md) | Embedding Provider 配置 | +| [`guides/ai-instance-handoff.md`](guides/ai-instance-handoff.md) | AI 实例交接指南 | ### 📚 技术参考(Reference) @@ -70,50 +81,76 @@ | 文档 | 说明 | |------|------| -| [`reference/mcp-tools.md`](reference/mcp-tools.md) | 38 个 MCP 工具完整清单(名称、tier、描述、参数、destructive gate) | -| [`reference/schema-v23.md`](reference/schema-v23.md) | 数据库 Schema v23:全部表结构、列定义、索引、迁移历史 | -| [`reference/entities-model.md`](reference/entities-model.md) | 统一实体模型详解:查询模式、双轨制过渡、自定义扩展 | +| [`reference/mcp-tools.md`](reference/mcp-tools.md) | 71 个 MCP 工具完整清单 | +| [`reference/schema-v23.md`](reference/schema-v23.md) | 数据库 Schema v23(⚠️ 待更新至 v36) | +| [`reference/entities-model.md`](reference/entities-model.md) | 统一实体模型详解 | +| [`reference/stable-tools/README.md`](reference/stable-tools/README.md) | 5 个 Stable 工具独立文档 | ### 🗺️ 路线与规划(Roadmaps & Plans) | 文档 | 说明 | |------|------| -| [`ROADMAP.md`](ROADMAP.md) | 唯一活跃主路线图。Phase 1–9 全记录 | -| [`plans/docs-reorganization-plan-v0.13.0.md`](plans/docs-reorganization-plan-v0.13.0.md) | **本文档重构计划**(2026-04-30) | +| [`ROADMAP.md`](ROADMAP.md) | 唯一活跃主路线图 | +| [`plans/v0.21.0-architecture-hardening.md`](plans/v0.21.0-architecture-hardening.md) | v0.21.0 架构硬化计划 | +| [`plans/greptimedb-integration.md`](plans/greptimedb-integration.md) | GreptimeDB 可选集成计划 | +| [`ops/roadmap-v0.14-v0.16.md`](ops/roadmap-v0.14-v0.16.md) | 历史路线图归档 | ### 🔬 研究分析(Research) -保留有长期价值的深度研究,精简自此前的 10 份文档。 +保留有长期价值的深度研究。 | 文档 | 说明 | |------|------| -| [`research/ai-tool-context.md`](research/ai-tool-context.md) | AI 开发工具上下文管理机制深度研究 | -| [`research/competitive-analysis.md`](research/competitive-analysis.md) | 竞争格局分析(合并版) | -| [`research/memory-infrastructure.md`](research/memory-infrastructure.md) | 记忆基础设施设计:从 Git repo 到 Knowledge Workspace | +| [`theory/AI_TOOL_CONTEXT_RESEARCH.md`](theory/AI_TOOL_CONTEXT_RESEARCH.md) | AI 开发工具上下文管理机制 | +| [`research/competitive-analysis.md`](research/competitive-analysis.md) | 竞争格局分析 | +| [`research/memory-infrastructure.md`](research/memory-infrastructure.md) | 记忆基础设施设计 | | [`research/ai-infrastructure-analysis.md`](research/ai-infrastructure-analysis.md) | AI 赛道基础设施分析 | | [`research/competitive-roadmap-table-a.md`](research/competitive-roadmap-table-a.md) | 五战蚕食战略路线 | +### 📐 RFC(Request for Comments) + +| 文档 | 说明 | +|------|------| +| [`RFC/agent-memory-vector-storage.md`](RFC/agent-memory-vector-storage.md) | Agent Memory 向量存储 RFC | +| [`RFC/claudecode-workflow-integration.md`](RFC/claudecode-workflow-integration.md) | ClaudeCode 工作流集成 RFC | + +### 🖥️ 客户端适配(Clients) + +客户端无关原则下的适配示例。 + +| 文档 | 说明 | +|------|------| +| [`clients/claude/scenarios.md`](clients/claude/scenarios.md) | Claude Code 使用场景 | + ### 🗄️ 归档(_archive/) -> 历史文档,保留只读价值,不再维护。 +> 历史文档,保留只读价值,不再维护。新增归档文件需在本文档注册。 | 文档 | 归档理由 | |------|----------| -| [`_archive/mcp-contract-v0.1.md`](_archive/mcp-contract-v0.1.md) | v0.1 草案仅 4 个 tool,已实现 38 个 | -| [`_archive/roadmap-2026.md`](_archive/roadmap-2026.md) | 自标"严重过时"(v0.2.3) | +| [`_archive/mcp-contract-v0.1.md`](_archive/mcp-contract-v0.1.md) | v0.1 草案,已实现 71 个 tool | +| [`_archive/roadmap-2026.md`](_archive/roadmap-2026.md) | 严重过时(v0.2.3) | | [`_archive/skill-runtime.md`](_archive/skill-runtime.md) | 已完全实现 | | [`_archive/tui-skill-integration.md`](_archive/tui-skill-integration.md) | 已完全实现 | -| [`_archive/sprint_2_plan.md`](_archive/sprint_2_plan.md) | Sprint 2 全部完成 | -| [`_archive/smoke_test_report_20260418.md`](_archive/smoke_test_report_20260418.md) | 旧版本测试报告 | | `_archive/*` | 其余见目录内文件 | ### 📊 运维与进度(Ops & Progress) | 文档 | 说明 | |------|------| -| [`ops/code-review-and-ops-plan.md`](ops/code-review-and-ops-plan.md) | v0.10.0 代码审计与运维计划 | +| [`ops/code-review-and-ops-plan.md`](ops/code-review-and-ops-plan.md) | 代码审计与运维计划 | | [`progress/progress-20260430.md`](progress/progress-20260430.md) | v0.13.0 日进度记录 | +### 📋 其他门面文件 + +| 文件 | 说明 | +|------|------| +| [`../AGENTS.md`](../AGENTS.md) | Agent 环境指引(权威) | +| [`../CONTRIBUTING.md`](../CONTRIBUTING.md) | 贡献指南 | +| [`../SECURITY.md`](../SECURITY.md) | 安全策略 | +| [`../CHANGELOG.md`](../CHANGELOG.md) | 版本变更日志 | +| [`../KNOWN_ISSUES.md`](../KNOWN_ISSUES.md) | 已知问题与技术债务 | + --- ## 文档维护原则 @@ -122,3 +159,8 @@ 2. **`_archive/` 文档**禁止修改内容,仅可添加顶部归档声明。 3. **新增文档**必须在本文档注册,否则视为孤立文档。 4. **每个 Markdown 文档顶部**应包含 `> **状态**:...` 标注。 +5. **关键数字指标**(版本、Schema、Tools、Tests)必须从代码/CI 实测,禁止复制粘贴旧值。 + +--- + +*本文件是文档目录的唯一入口。修改文档结构时请同步更新本文件。* diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index dba38aa..a3af8c7 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -1,10 +1,10 @@ # devbase Roadmap -> **当前阶段**:阶段十一 — v0.20.0 知识完备性(已发布) +> **当前阶段**:阶段十一 — v0.20.1 知识完备性(已发布) > -> **最后更新**:2026-05-14 +> **最后更新**:2026-06-13 > -> **版本状态**:`0.20.0`(Schema 34,68 MCP tools,451 tests) +> **版本状态**:`0.20.1`(Schema 36,71 MCP tools,605 tests) --- diff --git a/docs/guides/README.md b/docs/guides/README.md index c0ab98b..3cd442e 100644 --- a/docs/guides/README.md +++ b/docs/guides/README.md @@ -9,3 +9,5 @@ | [`mcp-integration.md`](mcp-integration.md) | AI 用户 | Kimi / Claude / Cursor MCP 配置 | | [`vault-format.md`](vault-format.md) | Vault 用户 | 笔记格式规范 | | [`vault-workflow.md`](vault-workflow.md) | Vault 用户 | PARA 目录结构实践 | +| [`embedding-provider-setup.md`](embedding-provider-setup.md) | 高级用户 | Embedding Provider 配置 | +| [`ai-instance-handoff.md`](ai-instance-handoff.md) | Agent 用户 | AI 实例交接指南 | diff --git a/docs/guides/mcp-integration.md b/docs/guides/mcp-integration.md index fcf44b0..7fb2e11 100644 --- a/docs/guides/mcp-integration.md +++ b/docs/guides/mcp-integration.md @@ -6,7 +6,7 @@ ## 什么是 MCP -MCP(Model Context Protocol)是 AI 助手与外部工具通信的标准协议。devbase 作为 **MCP Server**,向 AI 暴露 69 个结构化工具,让 AI 能够: +MCP(Model Context Protocol)是 AI 助手与外部工具通信的标准协议。devbase 作为 **MCP Server**,向 AI 暴露 71 个结构化工具,让 AI 能够: - 查询本地有哪些项目、它们的状态如何 - 批量同步仓库、检查健康度 @@ -104,18 +104,22 @@ cp configs/kimi-mcp.json ~/.kimi/mcp.json ## 工具列表概览 -devbase 提供 38 个工具,按域分类: +devbase 提供 71 个工具,按域分类: | 域 | 工具数 | 代表能力 | |:---|:---|:---| | 仓库管理 | 5 | scan, health, sync, query_repos, index | | 代码分析 | 6 | code_metrics, module_graph, code_symbols, call_graph, dependency_graph, dead_code | -| 知识检索 | 8 | semantic_search, hybrid_search, cross_repo_search, related_symbols, knowledge_report ... | -| Vault | 4 | vault_search, vault_read, vault_write, vault_backlinks | -| Skill | 4 | skill_list, skill_search, skill_run, skill_discover | -| 项目上下文 | 1 | project_context | -| 运维 | 3 | oplog_query, known_limit_store, known_limit_list | -| 其他 | 7 | query, note, digest, paper_index, github_info, arxiv_fetch, experiment_log | +| 知识检索 | 9 | semantic_search, hybrid_search, cross_repo_search, search_quality, embedding_store/search ... | +| Vault | 8 | vault_search/read/write, backlinks, daily, graph, export, history | +| Skill | 5 | skill_list/search/run/discover/sync | +| 项目上下文 | 3 | project_context, project_brief, impact_analysis | +| Session | 13 | session save/list/resume/attach/detach/activate/search/capture/workflows/recall/index/export/import | +| Index | 3 | index_health, index_stream | +| Workflow | 3 | workflow_list/run/status | +| Relation | 3 | relation_store/query/delete | +| Known Limit | 2 | known_limit_store/list | +| 运维与其他 | 11 | query, note, status, digest, paper_index, github_info, arxiv_fetch, oplog_query, evaluate, document_convert, ontology_import | 完整清单参见 [`reference/mcp-tools.md`](../reference/mcp-tools.md)。 diff --git a/docs/reference/README.md b/docs/reference/README.md index bad5da2..3e656c1 100644 --- a/docs/reference/README.md +++ b/docs/reference/README.md @@ -2,8 +2,9 @@ 技术参考文档,面向 AI Agent 和开发者。 -| 文档 | 内容 | -|------|------| -| [`mcp-tools.md`](mcp-tools.md) | 38 个 MCP 工具完整清单(名称、tier、描述、参数速查) | -| [`schema-v23.md`](schema-v23.md) | 数据库 Schema v23(表、列、索引、迁移历史) | -| [`entities-model.md`](entities-model.md) | 统一实体模型详解(entities / relations / entity_types) | +| 文档 | 内容 | 状态 | +|------|------|------| +| [`mcp-tools.md`](mcp-tools.md) | 71 个 MCP 工具完整清单(名称、tier、描述、参数速查) | 已同步 v0.20.1 | +| [`schema-v23.md`](schema-v23.md) | 数据库 Schema v23(表、列、索引、迁移历史) | ⚠️ 待更新至 v36 | +| [`entities-model.md`](entities-model.md) | 统一实体模型详解(entities / relations / entity_types) | 已同步 | +| [`stable-tools/README.md`](stable-tools/README.md) | 5 个 Stable 工具独立文档 | 已同步 | diff --git a/docs/reference/mcp-tools.md b/docs/reference/mcp-tools.md index 1210986..8ca235a 100644 --- a/docs/reference/mcp-tools.md +++ b/docs/reference/mcp-tools.md @@ -1,10 +1,12 @@ # MCP Tools 参考 -devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通信。工具按稳定性分为三级: +> 状态:已同步至 v0.20.1 · `src/mcp/mod.rs` -- **Stable** — 经过充分测试,schema 冻结。详见 [`stable-tools/`](stable-tools/README.md) 独立文档。 -- **Beta** — 功能验证通过,schema 可能微调 -- **Experimental** — 新功能,行为可能变化 +devbase MCP Server 提供 **71 个 tools**,通过 stdio 传输与 AI Agent 通信。工具按稳定性分为三级: + +- **Stable(5)** — 经过充分测试,schema 冻结。详见 [`stable-tools/`](stable-tools/README.md) 独立文档。 +- **Beta(58)** — 功能验证通过,schema 可能微调。 +- **Experimental(8)** — 新功能,行为可能变化。 --- @@ -15,7 +17,7 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_scan` | Beta | 扫描目录发现 Git 仓库并注册 | `path`, `register` | | [`devkit_health`](stable-tools/health.md) | Stable | 检查注册仓库的健康状态(dirty/behind/ahead) | `detail`, `limit`, `page` | | `devkit_sync` | Beta | 安全同步仓库与上游(destructive gate) | `repo_id`, `dry_run` | -| `devkit_query_repos` | Stable | 查询已注册仓库列表,支持 tag/language 过滤 | `query`, `limit`, `page` | +| [`devkit_query_repos`](stable-tools/query_repos.md) | Stable | 查询已注册仓库列表,支持 tag/language 过滤 | `query`, `limit`, `page` | | `devkit_index` | Beta | 索引仓库摘要、模块结构、代码符号 | `path` | ## 代码分析(6) @@ -29,7 +31,7 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_call_graph` | Beta | 获取函数调用图 | `repo_id`, `symbol_name` | | `devkit_dead_code` | Beta | 检测未被调用的私有函数 | `repo_id`, `include_pub` | -## 知识检索(8) +## 知识检索(9) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| @@ -41,13 +43,14 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_embedding_search` | Beta | 基于 embedding 的相似度搜索 | `repo_id`, `embedding`, `limit` | | `devkit_natural_language_query` | Beta | 自然语言查询(NLQ) | `query`, `limit` | | `devkit_knowledge_report` | Beta | 生成工作区知识覆盖报告 | `repo_id`, `activity_limit` | +| `devkit_search_quality` | Beta | 评估混合检索质量指标 | `repo_id`, `query` | ## Vault 笔记(8) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| | [`devkit_vault_search`](stable-tools/vault_search.md) | Stable | 关键词搜索 Vault 笔记 | `query` | -| `devkit_vault_read` | Stable | 读取指定 Vault 笔记的完整内容 | `path` | +| [`devkit_vault_read`](stable-tools/vault_read.md) | Stable | 读取指定 Vault 笔记的完整内容 | `path` | | `devkit_vault_write` | Beta | 写入或更新 Vault 笔记(destructive gate) | `path`, `content`, `frontmatter` | | `devkit_vault_backlinks` | Beta | 查找指向指定笔记的反向链接 | `note_id` | | `devkit_vault_daily` | Beta | 按日期列出 Vault 每日笔记 | `date`, `limit` | @@ -55,7 +58,7 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_vault_export` | Beta | 导出 Vault 笔记集合 | `query`, `format` | | `devkit_vault_history` | Beta | 获取 Vault 笔记修改历史 | `path`, `limit` | -## Skill 运行时(4) +## Skill 运行时(5) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| @@ -63,12 +66,13 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_skill_search` | Beta | 语义搜索 Skill | `query`, `limit` | | `devkit_skill_run` | Beta | 执行指定 Skill(destructive gate) | `skill_id`, `args` | | `devkit_skill_discover` | Beta | 将当前项目封装为 Skill(destructive gate,dry_run 默认 true) | `path` | +| `devkit_skill_sync` | Beta | 同步外部 Skill 源 | `source`, `dry_run` | ## 项目上下文(3) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| -| `devkit_project_context` | Stable | 获取项目统一上下文(repo + vault + assets + modules + symbols + calls) | `project` | +| [`devkit_project_context`](stable-tools/project_context.md) | Stable | 获取项目统一上下文(repo + vault + assets + modules + symbols + calls) | `project` | | `devkit_project_brief` | Beta | 生成 Markdown 项目摘要(架构 + 活动 + 限制),供 LLM 注入 | `repo_id`, `max_tokens` | | `devkit_impact_analysis` | Beta | 分析代码变更影响范围 | `repo_id`, `file_path` | @@ -120,7 +124,7 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_known_limit_store` | Beta | 记录已知限制(Hard Veto / Known Bug) | `id`, `category`, `description` | | `devkit_known_limit_list` | Beta | 列出已知限制 | `category`, `mitigated` | -## 其他(6) +## 运维与其他(12) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| @@ -129,13 +133,13 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_status` | Beta | 检查 devbase 服务状态 | — | | `devkit_digest` | Experimental | 生成每日知识摘要 | — | | `devkit_paper_index` | Experimental | 索引学术论文 | `title`, `authors`, `venue` | -| `devkit_search_quality` | Beta | 评估搜索质量指标 | `repo_id`, `query` | -| `devkit_experiment_log` | Beta | 记录实验结果 | `repo_id`, `paper_id`, `status` | | `devkit_github_info` | Beta | 查询 GitHub 仓库信息 | `owner`, `repo` | | `devkit_arxiv_fetch` | Beta | 从 arXiv 获取论文元数据 | `query`, `max_results` | | `devkit_oplog_query` | Beta | 查询操作日志 | `limit`, `repo_id` | | `devkit_evaluate` | Beta | 评估工具调用结果 | `tool_name`, `result` | +| `devkit_experiment_log` | Beta | 记录实验结果 | `repo_id`, `paper_id`, `status` | | `devkit_document_convert` | Experimental | PDF/PPTX → Markdown 转换 | `source_path`, `output_path` | +| `devkit_ontology_import` | Beta | 本体批量导入 | `path`, `dry_run` | --- @@ -151,6 +155,7 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 - `devkit_relation_delete` - `devkit_known_limit_store` - `devkit_workflow_run` +- `devkit_ontology_import` --- @@ -163,3 +168,7 @@ devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通 ``` 默认值:`stable,beta,experimental`(暴露全部)。 + +--- + +*本文件必须与 `src/mcp/mod.rs` 的 `McpToolEnum` / `tier()` 保持同步。* diff --git a/docs/reference/stable-tools/README.md b/docs/reference/stable-tools/README.md index 3b0b495..650e0d6 100644 --- a/docs/reference/stable-tools/README.md +++ b/docs/reference/stable-tools/README.md @@ -6,10 +6,10 @@ Breaking changes require a major version bump and a deprecation cycle. | Tool | Purpose | File | Test Coverage | |------|---------|------|---------------| | [`devkit_health`](health.md) | Check Git health (dirty/ahead/behind) of all registered repos | `repo.rs` | `test_tools_call_devkit_health` | -| `devkit_query_repos` | Query registered repos with language/tag/status filters | `repo.rs` | `test_tools_call_devkit_query_repos` | +| [`devkit_query_repos`](query_repos.md) | Query registered repos with language/tag/status filters | `repo.rs` | `test_tools_call_devkit_query_repos` | | [`devkit_vault_search`](vault_search.md) | Keyword search across Vault notes (titles, tags, content) | `vault.rs` | `test_tools_call_devkit_vault_search` | -| `devkit_vault_read` | Read full content of a Vault note including frontmatter | `vault.rs` | `test_tools_call_devkit_vault_read` | -| `devkit_project_context` | Unified project snapshot (repo + vault + symbols + relations + limits + skills) | `context.rs` | `test_tools_call_devkit_project_context` | +| [`devkit_vault_read`](vault_read.md) | Read full content of a Vault note including frontmatter | `vault.rs` | `test_tools_call_devkit_vault_read` | +| [`devkit_project_context`](project_context.md) | Unified project snapshot (repo + vault + symbols + relations + limits + skills) | `context.rs` | `test_tools_call_devkit_project_context` | ## Schema stability guarantee diff --git a/docs/reference/stable-tools/project_context.md b/docs/reference/stable-tools/project_context.md new file mode 100644 index 0000000..3d1f354 --- /dev/null +++ b/docs/reference/stable-tools/project_context.md @@ -0,0 +1,109 @@ +# devkit_project_context + +> **Tier**: Stable (frozen at v0.20.1) +> **Source**: `src/mcp/tools/context.rs` — `DevkitProjectContextTool` + +Retrieve a unified context snapshot for a project by aggregating repository metadata, linked Vault notes, code symbols, call graph edges, asset files, relations, workflows, known limits, and available skills. + +## Purpose + +- Understand a project holistically in a single tool call +- Prepare context before answering questions about a codebase +- Build project briefs or summaries without multiple round trips +- Discover documentation, assets, and related entities for a project + +## When NOT to use + +- Searching across all repos → use `devkit_query_repos` +- Vault full-text search without a project → use `devkit_vault_search` +- Checking health of multiple repos → use `devkit_health` +- Only one specific fact is needed → use the specific tool to save context space + +## Input Schema + +```json +{ + "type": "object", + "properties": { + "project": { + "type": "string", + "description": "Project identifier (repo id, repo name, or vault note id/path)" + }, + "goal": { + "type": "string", + "description": "Optional task description for relevance-ranking symbols and calls" + } + }, + "required": ["project"] +} +``` + +| Parameter | Type | Required | Default | Description | +|-----------|--------|----------|---------|------------------------------------------| +| `project` | string | Yes | — | Repo id, repo name, or vault note path | +| `goal` | string | No | — | Optional goal for relevance ranking | + +## Output Schema + +```json +{ + "success": true, + "project": "devbase", + "repo": { + "id": "devbase", + "path": "~/dev/devbase", + "language": "rust", + "tags": ["managed", "active"], + "stars": 42 + }, + "vault_notes": [ + { "id": "mcp-integration", "title": "MCP Integration", "source": "link" } + ], + "modules": [...], + "symbols": [...], + "calls": [...], + "activity": [...], + "related_symbols": [...], + "relations": [...], + "workflows": [...], + "assets": [...], + "recent_commits": [...], + "hot_files": [...], + "known_limits": [...], + "skills": [...] +} +``` + +### Top-level fields + +| Field | Type | Description | +|-------------------|----------|-----------------------------------------------------| +| `repo` | object? | Repository metadata or null | +| `vault_notes` | object[] | Linked and keyword-matched notes | +| `modules` | object[] | High-level module structure | +| `symbols` | object[] | Top code symbols (functions, structs, etc.) | +| `calls` | object[] | Call graph edges | +| `activity` | object[] | Recent OpLog events | +| `related_symbols` | object[] | Conceptual symbol-to-symbol links | +| `relations` | object[] | Knowledge-graph relations from `relations` table | +| `workflows` | object[] | Recent workflow executions | +| `assets` | object[] | Project asset files/folders | +| `recent_commits` | string[] | Recent commit messages | +| `hot_files` | string[] | Recently modified files | +| `known_limits` | object[] | Unmitigated known limits | +| `skills` | object[] | Available devbase skills | + +## Errors + +| Error | Cause | +|--------------------|------------------------------------------| +| `project required` | Missing `project` argument | +| No repo matched | Substring did not match any repo | + +## Changelog + +| Version | Change | +|---------|------------------------------------------| +| v0.14.2 | Promoted to Stable tier | +| v0.20.0 | Enriched with `known_limits` and `skills` | +| v0.20.1 | Invocation test `test_tools_call_devkit_project_context` added | diff --git a/docs/reference/stable-tools/query_repos.md b/docs/reference/stable-tools/query_repos.md new file mode 100644 index 0000000..b58a465 --- /dev/null +++ b/docs/reference/stable-tools/query_repos.md @@ -0,0 +1,96 @@ +# devkit_query_repos + +> **Tier**: Stable (frozen at v0.20.1) +> **Source**: `src/mcp/tools/repo.rs` — `DevkitQueryReposTool` + +Query registered repositories using structured filters. This is the primary read-only discovery tool for the local workspace. + +## Purpose + +- List all tracked repositories +- Filter by programming language (e.g., "rust", "python", "go") +- Filter by tag (e.g., "managed", "third-party", "active") +- Filter by Git status (dirty, ahead, behind, diverged, up_to_date) +- Get paginated repo listings with metadata + +## When NOT to use + +- Natural language queries → use `devkit_natural_language_query` +- Full-text search across repo contents → use `devkit_index` + search tools +- Detailed health diagnostics → use `devkit_health` +- Writing or modifying repos → use `devkit_sync` or `devkit_scan` + +## Input Schema + +```json +{ + "type": "object", + "properties": { + "language": { "type": "string", "description": "Filter by programming language", "default": "" }, + "tag": { "type": "string", "description": "Filter by tag", "default": "" }, + "status": { "type": "string", "enum": ["dirty", "ahead", "behind", "diverged", "up_to_date", ""], "description": "Filter by Git status", "default": "" }, + "limit": { "type": "integer", "description": "Max results", "default": 50 } + } +} +``` + +| Parameter | Type | Required | Default | Description | +|-----------|---------|----------|---------|--------------------------------------| +| `language`| string | No | `""` | Programming language filter | +| `tag` | string | No | `""` | Tag filter (case-insensitive) | +| `status` | string | No | `""` | Git status enum or empty for all | +| `limit` | integer | No | `50` | Maximum number of results | + +### Status values + +| Status | Meaning | +|--------------|-------------------------------------------| +| `dirty` | Uncommitted changes in working tree | +| `ahead` | Local commits not pushed, no remote ahead | +| `behind` | Remote commits not pulled, no local ahead | +| `diverged` | Both ahead and behind | +| `up_to_date` | Clean and synchronized | + +## Output Schema + +```json +{ + "success": true, + "count": 3, + "repos": [ + { + "id": "devbase", + "path": "~/dev/devbase", + "language": "rust", + "tags": ["managed", "active"], + "status": { "dirty": false, "ahead": 0, "behind": 0 }, + "stars": 42 + } + ] +} +``` + +| Field | Type | Description | +|------------------|----------|------------------------------------------| +| `id` | string | Repository identifier | +| `path` | string | Local path (home masked as `~`) | +| `language` | string? | Primary programming language | +| `tags` | string[] | Associated tags | +| `status.dirty` | boolean | Whether working tree has changes | +| `status.ahead` | integer | Commits ahead of upstream | +| `status.behind` | integer | Commits behind upstream | +| `stars` | integer | GitHub stars cache | + +## Errors + +| Error | Cause | +|------------------------------|------------------------------------------| +| Database connection failed | SQLite locked or corrupted | +| Filter parse error | Invalid `status` enum value | + +## Changelog + +| Version | Change | +|---------|------------------------------------------| +| v0.14.2 | Promoted to Stable tier | +| v0.20.1 | Invocation test `test_tools_call_devkit_query_repos` added | diff --git a/docs/reference/stable-tools/vault_read.md b/docs/reference/stable-tools/vault_read.md new file mode 100644 index 0000000..bbfd535 --- /dev/null +++ b/docs/reference/stable-tools/vault_read.md @@ -0,0 +1,75 @@ +# devkit_vault_read + +> **Tier**: Stable (frozen at v0.20.1) +> **Source**: `src/mcp/tools/vault.rs` — `DevkitVaultReadTool` + +Read the complete Markdown content of a Vault note, including its YAML frontmatter and body. + +## Purpose + +- Read a specific note after finding it via `devkit_vault_search` +- Retrieve project documentation, architecture decisions, or design notes +- Extract frontmatter metadata (tags, repo links, ai_context) +- Render note content for the user or for downstream processing + +## When NOT to use + +- Searching for notes → use `devkit_vault_search` +- Writing or updating notes → use `devkit_vault_write` +- Finding backlinks → use `devkit_vault_backlinks` +- Reading code files → use filesystem tools or `devkit_project_context` + +## Input Schema + +```json +{ + "type": "object", + "properties": { + "path": { "type": "string", "description": "File path or note id" } + }, + "required": ["path"] +} +``` + +| Parameter | Type | Required | Default | Description | +|-----------|--------|----------|---------|--------------------------------| +| `path` | string | Yes | — | Vault-relative path or note id | + +## Output Schema + +```json +{ + "success": true, + "path": "references/mcp-integration.md", + "frontmatter": { + "id": "mcp-integration", + "title": "MCP Integration Guide", + "tags": ["mcp", "integration"], + "repo": "devbase", + "created": "2026-04-20", + "updated": "2026-06-13" + }, + "content": "# MCP Integration Guide\n\n..." +} +``` + +| Field | Type | Description | +|---------------|---------|------------------------------------------| +| `path` | string | Requested path | +| `frontmatter` | object? | Parsed YAML frontmatter | +| `content` | string | Markdown body (may be empty) | + +## Errors + +| Error | Cause | +|--------------------|------------------------------------------| +| `path required` | Missing or empty `path` argument | +| Vault unreadable | Vault directory missing or permission denied | +| Note not found | No note matches the given path/id | + +## Changelog + +| Version | Change | +|---------|------------------------------------------| +| v0.14.2 | Promoted to Stable tier | +| v0.20.1 | Invocation test `test_tools_call_devkit_vault_read` added | diff --git a/server.json b/server.json index 75802e0..dfbd9ca 100644 --- a/server.json +++ b/server.json @@ -1,186 +1,200 @@ -{ - "$schema": "https://registry.modelcontextprotocol.io/schema/server.json", - "name": "io.github.juice094.devbase", - "version": "0.20.1", - "description": "Developer Knowledge OS — manage Git repos, vault notes (Markdown), and assets. AI-native workspace with 71 MCP tools.", - "license": "AGPL-3.0-or-later", - "homepage": "https://github.com/juice094/devbase", - "repository": { - "type": "git", - "url": "https://github.com/juice094/devbase.git" - }, - "maintainers": [ - { - "name": "juice094", - "email": "160722440+juice094@users.noreply.github.com" - } - ], - "categories": ["developer-tools", "knowledge-management", "version-control"], - "tags": ["git", "rust", "vault", "notes", "mcp"], - "runtime": { - "type": "stdio", - "command": "devbase", - "args": ["mcp"] - }, - "installation": { - "cargo": { - "crate": "devbase", - "bin": "devbase" - } - }, - "capabilities": { - "tools": { - "stable": [ - "devkit_health", - "devkit_query_repos", - "devkit_vault_search", - "devkit_vault_read", - "devkit_project_context" - ], - "beta": [ - "devkit_scan", - "devkit_sync", - "devkit_query", - "devkit_index", - "devkit_index_stream", - "devkit_status", - "devkit_note", - "devkit_digest", - "devkit_paper_index", - "devkit_experiment_log", - "devkit_github_info", - "devkit_arxiv_fetch", - "devkit_code_metrics", - "devkit_module_graph", - "devkit_code_symbols", - "devkit_dependency_graph", - "devkit_call_graph", - "devkit_dead_code", - "devkit_semantic_search", - "devkit_embedding_store", - "devkit_embedding_search", - "devkit_natural_language_query", - "devkit_vault_write", - "devkit_vault_backlinks", - "devkit_vault_daily", - "devkit_vault_graph", - "devkit_vault_export", - "devkit_vault_history", - "devkit_project_brief", - "devkit_impact_analysis", - "devkit_cross_repo_search", - "devkit_knowledge_report", - "devkit_related_symbols", - "devkit_hybrid_search", - "devkit_search_quality", - "devkit_skill_list", - "devkit_skill_search", - "devkit_skill_run", - "devkit_skill_discover", - "devkit_skill_sync", - "devkit_known_limit_store", - "devkit_known_limit_list", - "devkit_relation_store", - "devkit_relation_query", - "devkit_relation_delete", - "devkit_workflow_list", - "devkit_workflow_run", - "devkit_workflow_status", - "devkit_session_save", - "devkit_session_list", - "devkit_session_resume", - "devkit_session_attach", - "devkit_session_detach", - "devkit_session_activate", - "devkit_session_search", - "devkit_session_capture", - "devkit_session_workflows", - "devkit_session_recall", - "devkit_session_index", - "devkit_session_export", - "devkit_session_import", - "devkit_oplog_query", - "devkit_evaluate", - "devkit_document_convert", - "devkit_ontology_import", - "devkit_index_health" - ], - "experimental": [] - } - }, - "tools": { - "count": 71, - "list": [ - "devkit_scan", - "devkit_health", - "devkit_sync", - "devkit_query", - "devkit_query_repos", - "devkit_index", - "devkit_index_stream", - "devkit_status", - "devkit_note", - "devkit_digest", - "devkit_paper_index", - "devkit_experiment_log", - "devkit_github_info", - "devkit_arxiv_fetch", - "devkit_code_metrics", - "devkit_module_graph", - "devkit_code_symbols", - "devkit_dependency_graph", - "devkit_call_graph", - "devkit_dead_code", - "devkit_semantic_search", - "devkit_embedding_store", - "devkit_embedding_search", - "devkit_natural_language_query", - "devkit_vault_search", - "devkit_vault_read", - "devkit_vault_write", - "devkit_vault_backlinks", - "devkit_vault_daily", - "devkit_vault_graph", - "devkit_vault_export", - "devkit_vault_history", - "devkit_project_context", - "devkit_project_brief", - "devkit_impact_analysis", - "devkit_cross_repo_search", - "devkit_knowledge_report", - "devkit_related_symbols", - "devkit_hybrid_search", - "devkit_search_quality", - "devkit_skill_list", - "devkit_skill_search", - "devkit_skill_run", - "devkit_skill_discover", - "devkit_known_limit_store", - "devkit_known_limit_list", - "devkit_relation_store", - "devkit_relation_query", - "devkit_relation_delete", - "devkit_workflow_list", - "devkit_workflow_run", - "devkit_workflow_status", - "devkit_session_save", - "devkit_session_list", - "devkit_session_resume", - "devkit_session_attach", - "devkit_session_detach", - "devkit_session_activate", - "devkit_session_search", - "devkit_session_capture", - "devkit_session_workflows", - "devkit_session_recall", - "devkit_session_index", - "devkit_session_export", - "devkit_session_import", - "devkit_oplog_query", - "devkit_evaluate", - "devkit_document_convert", - "devkit_ontology_import", - "devkit_index_health" - ] - } -} +{ + "$schema": "https://registry.modelcontextprotocol.io/schema/server.json", + "name": "io.github.juice094.devbase", + "version": "0.20.1", + "description": "Developer Knowledge OS \u2014 manage Git repos, vault notes (Markdown), and assets. AI-native workspace with 71 MCP tools.", + "license": "AGPL-3.0-or-later", + "homepage": "https://github.com/juice094/devbase", + "repository": { + "type": "git", + "url": "https://github.com/juice094/devbase.git" + }, + "maintainers": [ + { + "name": "juice094", + "email": "160722440+juice094@users.noreply.github.com" + } + ], + "categories": [ + "developer-tools", + "knowledge-management", + "version-control" + ], + "tags": [ + "git", + "rust", + "vault", + "notes", + "mcp" + ], + "runtime": { + "type": "stdio", + "command": "devbase", + "args": [ + "mcp" + ] + }, + "installation": { + "cargo": { + "crate": "devbase", + "bin": "devbase" + } + }, + "capabilities": { + "tools": { + "stable": [ + "devkit_health", + "devkit_query_repos", + "devkit_vault_search", + "devkit_vault_read", + "devkit_project_context" + ], + "beta": [ + "devkit_scan", + "devkit_sync", + "devkit_query", + "devkit_index", + "devkit_index_stream", + "devkit_status", + "devkit_note", + "devkit_experiment_log", + "devkit_github_info", + "devkit_arxiv_fetch", + "devkit_code_metrics", + "devkit_module_graph", + "devkit_code_symbols", + "devkit_dependency_graph", + "devkit_call_graph", + "devkit_dead_code", + "devkit_semantic_search", + "devkit_embedding_store", + "devkit_embedding_search", + "devkit_natural_language_query", + "devkit_vault_write", + "devkit_vault_backlinks", + "devkit_vault_daily", + "devkit_vault_graph", + "devkit_vault_export", + "devkit_vault_history", + "devkit_project_brief", + "devkit_impact_analysis", + "devkit_cross_repo_search", + "devkit_knowledge_report", + "devkit_hybrid_search", + "devkit_search_quality", + "devkit_skill_list", + "devkit_skill_search", + "devkit_skill_run", + "devkit_skill_discover", + "devkit_skill_sync", + "devkit_known_limit_store", + "devkit_known_limit_list", + "devkit_relation_store", + "devkit_relation_query", + "devkit_relation_delete", + "devkit_workflow_list", + "devkit_workflow_run", + "devkit_workflow_status", + "devkit_session_save", + "devkit_session_list", + "devkit_session_resume", + "devkit_session_attach", + "devkit_session_detach", + "devkit_session_activate", + "devkit_session_search", + "devkit_session_capture", + "devkit_session_workflows", + "devkit_oplog_query", + "devkit_evaluate", + "devkit_ontology_import", + "devkit_index_health" + ], + "experimental": [ + "devkit_digest", + "devkit_document_convert", + "devkit_paper_index", + "devkit_related_symbols", + "devkit_session_export", + "devkit_session_import", + "devkit_session_index", + "devkit_session_recall" + ] + } + }, + "tools": { + "count": 71, + "list": [ + "devkit_arxiv_fetch", + "devkit_call_graph", + "devkit_code_metrics", + "devkit_code_symbols", + "devkit_cross_repo_search", + "devkit_dead_code", + "devkit_dependency_graph", + "devkit_digest", + "devkit_document_convert", + "devkit_embedding_search", + "devkit_embedding_store", + "devkit_evaluate", + "devkit_experiment_log", + "devkit_github_info", + "devkit_health", + "devkit_hybrid_search", + "devkit_impact_analysis", + "devkit_index", + "devkit_index_health", + "devkit_index_stream", + "devkit_knowledge_report", + "devkit_known_limit_list", + "devkit_known_limit_store", + "devkit_module_graph", + "devkit_natural_language_query", + "devkit_note", + "devkit_ontology_import", + "devkit_oplog_query", + "devkit_paper_index", + "devkit_project_brief", + "devkit_project_context", + "devkit_query", + "devkit_query_repos", + "devkit_related_symbols", + "devkit_relation_delete", + "devkit_relation_query", + "devkit_relation_store", + "devkit_scan", + "devkit_search_quality", + "devkit_semantic_search", + "devkit_session_activate", + "devkit_session_attach", + "devkit_session_capture", + "devkit_session_detach", + "devkit_session_export", + "devkit_session_import", + "devkit_session_index", + "devkit_session_list", + "devkit_session_recall", + "devkit_session_resume", + "devkit_session_save", + "devkit_session_search", + "devkit_session_workflows", + "devkit_skill_discover", + "devkit_skill_list", + "devkit_skill_run", + "devkit_skill_search", + "devkit_skill_sync", + "devkit_status", + "devkit_sync", + "devkit_vault_backlinks", + "devkit_vault_daily", + "devkit_vault_export", + "devkit_vault_graph", + "devkit_vault_history", + "devkit_vault_read", + "devkit_vault_search", + "devkit_vault_write", + "devkit_workflow_list", + "devkit_workflow_run", + "devkit_workflow_status" + ] + } +} diff --git a/smithery.yaml b/smithery.yaml index ef67a48..3df47dd 100644 --- a/smithery.yaml +++ b/smithery.yaml @@ -16,8 +16,9 @@ startCommand: description: | Developer Knowledge OS — manage Git repos, vault notes (Markdown), and assets. - Provides 19 MCP tools for repo scanning, health checks, vault search/backlinks, - and unified project context queries. + Provides 71 MCP tools (5 stable / 58 beta / 8 experimental) for repo scanning, + health checks, code analysis, vault search/backlinks, agent sessions, workflows, + knowledge graphs, and unified project context queries. categories: - developer-tools From 3137f9019df8c4e84b4af9ef346afcaaa379c7ae Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 13 Jun 2026 22:33:46 +0800 Subject: [PATCH 10/11] style: cargo fmt on feature branch --- src/commands/mod.rs | 22 +- src/commands/ontology.rs | 10 +- src/commands/skill.rs | 29 ++- src/mcp/tools/ontology_import.rs | 5 +- src/mcp/tools/skill_sync.rs | 14 +- src/mcp/tools/vault.rs | 11 +- src/registry/import_ontology.rs | 32 +-- src/skill_runtime/sources.rs | 21 +- src/vault/scanner.rs | 379 ++++++++++++++++--------------- 9 files changed, 251 insertions(+), 272 deletions(-) diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 666565a..d97fb96 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,11 +1,11 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -pub mod analysis; -pub mod knowledge; -pub mod ontology; -pub mod limit; -pub mod repo; -pub mod simple; -pub mod skill; -pub mod system; -pub mod workflow; +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +pub mod analysis; +pub mod knowledge; +pub mod limit; +pub mod ontology; +pub mod repo; +pub mod simple; +pub mod skill; +pub mod system; +pub mod workflow; diff --git a/src/commands/ontology.rs b/src/commands/ontology.rs index 1762ded..f483a37 100644 --- a/src/commands/ontology.rs +++ b/src/commands/ontology.rs @@ -5,10 +5,7 @@ pub fn run_import( dry_run: bool, ) -> anyhow::Result<()> { let wp = if workspace.is_empty() { - dirs::home_dir() - .unwrap_or_default() - .join(".kimi_openclaw") - .join("workspace") + dirs::home_dir().unwrap_or_default().join(".kimi_openclaw").join("workspace") } else { std::path::PathBuf::from(workspace) }; @@ -38,7 +35,10 @@ pub fn run_import( println!("Ontology import from: {}", wp.display()); println!(" Entities: {} added, {} updated", stats.entities_added, stats.entities_updated); - println!(" Relations: {} added, {} updated", stats.relations_added, stats.relations_updated); + println!( + " Relations: {} added, {} updated", + stats.relations_added, stats.relations_updated + ); if !stats.errors.is_empty() { println!(" Errors: {}", stats.errors.len()); for e in &stats.errors { diff --git a/src/commands/skill.rs b/src/commands/skill.rs index ae83f94..2f73911 100644 --- a/src/commands/skill.rs +++ b/src/commands/skill.rs @@ -334,23 +334,20 @@ pub fn run_skill( json, } => { use crate::skill_runtime::sources::{GitHubSource, LocalFileSource, SkillSource}; - let source_impl: Box = - if source.starts_with("https://github.com/") - || source.starts_with("http://github.com/") - || (source.contains('/') - && !source.starts_with('/') - && !source.contains("://")) - { - let (owner, repo) = parse_github_url(&source)?; - let path = source_path.as_deref().unwrap_or("skills"); - Box::new(GitHubSource::new(&owner, &repo, path)) - } else { - let path = source.strip_prefix("file://").unwrap_or(&source); - let name = source_path.as_deref().unwrap_or(path); - Box::new(LocalFileSource::new(name, std::path::Path::new(path))) - }; + let source_impl: Box = if source.starts_with("https://github.com/") + || source.starts_with("http://github.com/") + || (source.contains('/') && !source.starts_with('/') && !source.contains("://")) + { + let (owner, repo) = parse_github_url(&source)?; + let path = source_path.as_deref().unwrap_or("skills"); + Box::new(GitHubSource::new(&owner, &repo, path)) + } else { + let path = source.strip_prefix("file://").unwrap_or(&source); + let name = source_path.as_deref().unwrap_or(path); + Box::new(LocalFileSource::new(name, std::path::Path::new(path))) + }; let skills = tokio::runtime::Runtime::new() - .unwrap() + .context("failed to create tokio runtime for skill fetch")? .block_on(source_impl.fetch())?; if dry_run { if json { diff --git a/src/mcp/tools/ontology_import.rs b/src/mcp/tools/ontology_import.rs index 23c674b..fb735f0 100644 --- a/src/mcp/tools/ontology_import.rs +++ b/src/mcp/tools/ontology_import.rs @@ -44,10 +44,7 @@ Requires DEVBASE_MCP_ENABLE_DESTRUCTIVE=1 since this modifies the registry."#, .and_then(|v| v.as_str()) .map(std::path::PathBuf::from) .unwrap_or_else(|| { - dirs::home_dir() - .unwrap_or_default() - .join(".kimi_openclaw") - .join("workspace") + dirs::home_dir().unwrap_or_default().join(".kimi_openclaw").join("workspace") }); let stats = { diff --git a/src/mcp/tools/skill_sync.rs b/src/mcp/tools/skill_sync.rs index 3f3d5c8..47db2da 100644 --- a/src/mcp/tools/skill_sync.rs +++ b/src/mcp/tools/skill_sync.rs @@ -1,8 +1,8 @@ use anyhow::Context; use crate::mcp::McpTool; -use crate::skill_runtime::sources::{GitHubSource, LocalFileSource, SkillSource}; use crate::skill_runtime::registry; +use crate::skill_runtime::sources::{GitHubSource, LocalFileSource, SkillSource}; #[derive(Clone)] pub struct DevkitSkillSyncTool; @@ -56,14 +56,13 @@ Requires DEVBASE_MCP_ENABLE_DESTRUCTIVE=1 since this modifies the skill registry .and_then(|v| v.as_str()) .context("Missing required argument: source")?; let source_path = args.get("source_path").and_then(|v| v.as_str()); - let dry_run = args - .get("dry_run") - .and_then(|v| v.as_bool()) - .unwrap_or(false); + let dry_run = args.get("dry_run").and_then(|v| v.as_bool()).unwrap_or(false); let source: Box = if source_url.starts_with("https://github.com/") || source_url.starts_with("http://github.com/") - || (source_url.contains('/') && !source_url.starts_with("/") && !source_url.contains("://")) + || (source_url.contains('/') + && !source_url.starts_with("/") + && !source_url.contains("://")) { let (owner, repo) = parse_github_url(source_url)?; let path = source_path.unwrap_or("skills"); @@ -142,7 +141,8 @@ fn parse_github_url(url: &str) -> anyhow::Result<(String, String)> { } // Bare owner/repo format if let Some((owner, repo)) = url.split_once('/') { - if !owner.is_empty() && !repo.is_empty() + if !owner.is_empty() + && !repo.is_empty() && !owner.contains("://") && !owner.contains('\\') && !owner.contains(' ') diff --git a/src/mcp/tools/vault.rs b/src/mcp/tools/vault.rs index b88b7c3..70b1cbd 100644 --- a/src/mcp/tools/vault.rs +++ b/src/mcp/tools/vault.rs @@ -265,11 +265,12 @@ fn resolve_vault_write_path( cfg.vault.roots.iter().map(std::path::PathBuf::from).collect() } _ => { - vec![ctx - .storage - .workspace_dir() - .map(|ws| ws.join("vault")) - .unwrap_or_else(|_| std::path::PathBuf::from("vault"))] + vec![ + ctx.storage + .workspace_dir() + .map(|ws| ws.join("vault")) + .unwrap_or_else(|_| std::path::PathBuf::from("vault")), + ] } }; diff --git a/src/registry/import_ontology.rs b/src/registry/import_ontology.rs index e42ddb3..38cb1f2 100644 --- a/src/registry/import_ontology.rs +++ b/src/registry/import_ontology.rs @@ -15,12 +15,10 @@ pub struct OntologyImportStats { } /// Import ontology entities and relations from an OpenClaw-compatible workspace. -pub fn import_ontology( - conn: &Connection, - workspace_path: &Path, -) -> Result { +pub fn import_ontology(conn: &Connection, workspace_path: &Path) -> Result { let entities_dir = workspace_path.join("ontology").join("entities"); - let relations_file = workspace_path.join("ontology").join("relations").join("core-relations.jsonl"); + let relations_file = + workspace_path.join("ontology").join("relations").join("core-relations.jsonl"); let mut stats = OntologyImportStats::default(); @@ -76,11 +74,9 @@ fn import_entity_file(conn: &Connection, path: &Path) -> Result<(usize, usize)> let metadata = serde_json::to_string(&entity).unwrap_or_default(); let now = chrono::Utc::now().to_rfc3339(); let existing: Option = conn - .query_row( - "SELECT id FROM entities WHERE id = ?1", - rusqlite::params![entity_id], - |row| row.get(0), - ) + .query_row("SELECT id FROM entities WHERE id = ?1", rusqlite::params![entity_id], |row| { + row.get(0) + }) .ok(); conn.execute( @@ -141,18 +137,14 @@ fn import_relations_file(conn: &Connection, path: &Path) -> Result<(usize, usize // Skip relations referencing non-existent entities (FK constraint) let from_exists: bool = conn - .query_row( - "SELECT 1 FROM entities WHERE id = ?1", - rusqlite::params![from_id], - |_| Ok(true), - ) + .query_row("SELECT 1 FROM entities WHERE id = ?1", rusqlite::params![from_id], |_| { + Ok(true) + }) .unwrap_or(false); let to_exists: bool = conn - .query_row( - "SELECT 1 FROM entities WHERE id = ?1", - rusqlite::params![to_id], - |_| Ok(true), - ) + .query_row("SELECT 1 FROM entities WHERE id = ?1", rusqlite::params![to_id], |_| { + Ok(true) + }) .unwrap_or(false); if !from_exists || !to_exists { tracing::warn!("Skipping relation {}: from or to entity not found", relation_id); diff --git a/src/skill_runtime/sources.rs b/src/skill_runtime/sources.rs index 72d1390..623979c 100644 --- a/src/skill_runtime/sources.rs +++ b/src/skill_runtime/sources.rs @@ -80,11 +80,8 @@ impl SkillSource for GitHubSource { // Handle directories by recursing (Box::pin required for recursive async fn) if entry_type == "dir" { - let dir_source = GitHubSource::new( - &self.owner, - &self.repo, - &format!("{}/{}", self.path, name), - ); + let dir_source = + GitHubSource::new(&self.owner, &self.repo, &format!("{}/{}", self.path, name)); let fut = Box::pin(dir_source.fetch()); if let Ok(dir_skills) = fut.await { skills.extend(dir_skills); @@ -169,11 +166,7 @@ fn scan_dir_for_skills(dir: &Path, skills: &mut Vec) -> anyhow::Resul scan_dir_for_skills(&path, skills)?; } else if path.extension().map_or(false, |e| e == "md") { let content = std::fs::read_to_string(&path)?; - let name = path - .file_stem() - .unwrap_or_default() - .to_string_lossy() - .to_string(); + let name = path.file_stem().unwrap_or_default().to_string_lossy().to_string(); let skill_id = name.to_lowercase().replace('_', "-"); let skill_meta = if content.contains("---") { parse_skill_or_extract(&content, &skill_id, "") @@ -262,13 +255,7 @@ fn extract_tags(content: &str) -> Vec { if let Some(tags_str) = trimmed.strip_prefix("tags:") { return tags_str .split(',') - .map(|t| { - t.trim() - .trim_matches('"') - .trim_matches('[') - .trim_matches(']') - .to_string() - }) + .map(|t| t.trim().trim_matches('"').trim_matches('[').trim_matches(']').to_string()) .filter(|t| !t.is_empty()) .collect(); } diff --git a/src/vault/scanner.rs b/src/vault/scanner.rs index ba3c1a1..377ae0a 100644 --- a/src/vault/scanner.rs +++ b/src/vault/scanner.rs @@ -1,187 +1,192 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2026 juice094 -use crate::registry::VaultNote; -use crate::vault::frontmatter::extract_frontmatter; -use crate::vault::wikilink::extract_wikilinks; - -use chrono::Utc; -use std::path::{Path, PathBuf}; -use tracing::{info, warn}; - -fn default_vault_dir() -> anyhow::Result { - let ws = crate::registry::WorkspaceRegistry::workspace_dir()?; - let vault = ws.join("vault"); - // P1-2: PARA directory structure - for sub in &["00-Inbox", "01-Projects", "02-Areas", "03-Resources", "04-Archives", "99-Meta"] { - std::fs::create_dir_all(vault.join(sub))?; - } - Ok(vault) -} - -/// Options for scanning vault directories. -#[derive(Debug, Clone)] -pub struct ScanOptions { - pub roots: Vec, - pub follow_links: bool, -} - -impl Default for ScanOptions { - fn default() -> Self { - Self { - roots: vec![], - follow_links: true, - } - } -} - -/// Scan vault directories for Markdown notes and sync them into the registry. -/// -/// * `options` — scan options (roots, follow_links). If roots is empty, uses the default vault location. -/// * Returns the number of notes synced. -pub fn scan_vault_with_options( - conn: &mut rusqlite::Connection, - options: &ScanOptions, -) -> anyhow::Result { - let roots = if options.roots.is_empty() { - vec![default_vault_dir()?] - } else { - options.roots.clone() - }; - - let mut synced = 0; - - let multi_root = roots.len() > 1; - - for root in &roots { - if !root.exists() { - info!("Vault root does not exist yet: {:?}", root); - continue; - } - - let walker = walkdir::WalkDir::new(root) - .follow_links(options.follow_links); - - for entry in walker - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| e.file_type().is_file()) - .filter(|e| e.path().extension().map(|ext| ext == "md").unwrap_or(false)) - { - let path = entry.path(); - let rel_path = path.strip_prefix(root).unwrap_or(path); - let id = if multi_root { - format!("{}/{}", root.file_name().unwrap_or_default().to_string_lossy(), - rel_path.to_string_lossy().replace('\\', "/")) - } else { - rel_path.to_string_lossy().replace('\\', "/") - }; - - match std::fs::read_to_string(path) { - Ok(content) => { - let (frontmatter, body_offset) = extract_frontmatter(&content) - .map(|(fm, off)| (Some(fm), off)) - .unwrap_or((None, 0)); - - let body = &content[body_offset..]; - let wikilinks = extract_wikilinks(body); - let outgoing: Vec = wikilinks.iter().map(|l| l.target.clone()).collect(); - let block_refs: Vec = - wikilinks.iter().filter_map(|l| l.anchor.clone()).collect(); - - let title = frontmatter.as_ref().and_then(|fm| fm.title.clone()).or_else(|| { - // Fallback: first H1 heading - body.lines() - .find_map(|l| l.trim().strip_prefix("# ").map(|s| s.trim().to_string())) - }); - - let tags = frontmatter.as_ref().map(|fm| fm.tags.clone()).unwrap_or_default(); - let linked_repo = frontmatter.as_ref().and_then(|fm| fm.repo.clone()); - let fm_raw = frontmatter.map(|fm| fm.raw); - - let note = VaultNote { - id, - path: path.to_string_lossy().to_string(), - title, - content: body.trim().to_string(), - frontmatter: fm_raw, - tags, - outgoing_links: outgoing, - block_refs, - linked_repo, - created_at: Utc::now(), - updated_at: Utc::now(), - }; - - if let Err(e) = crate::registry::vault::save_vault_note(conn, ¬e) { - warn!("Failed to save vault note {}: {}", note.id, e); - } else { - synced += 1; - } - } - Err(e) => { - warn!("Failed to read vault file {:?}: {}", path, e); - } - } - } - } - - info!("Vault scan complete: {} notes synced", synced); - Ok(synced) -} - -/// Legacy API: scan a single vault directory. -/// -/// * `vault_dir` — root of the vault. If `None`, uses the default location. -/// * Returns the number of notes synced. -pub fn scan_vault( - conn: &mut rusqlite::Connection, - vault_dir: Option<&Path>, -) -> anyhow::Result { - let options = ScanOptions { - roots: vault_dir.map(|p| vec![p.to_path_buf()]).unwrap_or_default(), - follow_links: false, - }; - scan_vault_with_options(conn, &options) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::registry::WorkspaceRegistry; - - #[test] - fn test_scan_vault_basic() { - let tmp = std::env::temp_dir().join(format!("devbase_vault_scan_{}", std::process::id())); - std::fs::create_dir_all(&tmp).unwrap(); - std::fs::write( - tmp.join("hello.md"), - "---\ntitle: Hello World\ntags: [rust, cli]\n---\n# Hello World\n\nThis is a [[test]] note.\n", - ) - .unwrap(); - - let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); - let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); - assert_eq!(count, 1); - - std::fs::remove_dir_all(&tmp).unwrap(); - } - - #[test] - fn test_scan_vault_empty_dir() { - let tmp = std::env::temp_dir().join(format!("devbase_vault_empty_{}", std::process::id())); - std::fs::create_dir_all(&tmp).unwrap(); - let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); - let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); - assert_eq!(count, 0); - std::fs::remove_dir_all(&tmp).unwrap(); - } - - #[test] - fn test_scan_vault_missing_dir() { - let tmp = - std::env::temp_dir().join(format!("devbase_vault_missing_{}", std::process::id())); - let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); - let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); - assert_eq!(count, 0); - } -} +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use crate::registry::VaultNote; +use crate::vault::frontmatter::extract_frontmatter; +use crate::vault::wikilink::extract_wikilinks; + +use chrono::Utc; +use std::path::{Path, PathBuf}; +use tracing::{info, warn}; + +fn default_vault_dir() -> anyhow::Result { + let ws = crate::registry::WorkspaceRegistry::workspace_dir()?; + let vault = ws.join("vault"); + // P1-2: PARA directory structure + for sub in &["00-Inbox", "01-Projects", "02-Areas", "03-Resources", "04-Archives", "99-Meta"] { + std::fs::create_dir_all(vault.join(sub))?; + } + Ok(vault) +} + +/// Options for scanning vault directories. +#[derive(Debug, Clone)] +pub struct ScanOptions { + pub roots: Vec, + pub follow_links: bool, +} + +impl Default for ScanOptions { + fn default() -> Self { + Self { + roots: vec![], + follow_links: true, + } + } +} + +/// Scan vault directories for Markdown notes and sync them into the registry. +/// +/// * `options` — scan options (roots, follow_links). If roots is empty, uses the default vault location. +/// * Returns the number of notes synced. +pub fn scan_vault_with_options( + conn: &mut rusqlite::Connection, + options: &ScanOptions, +) -> anyhow::Result { + let roots = if options.roots.is_empty() { + vec![default_vault_dir()?] + } else { + options.roots.clone() + }; + + let mut synced = 0; + + let multi_root = roots.len() > 1; + + for root in &roots { + if !root.exists() { + info!("Vault root does not exist yet: {:?}", root); + continue; + } + + let walker = walkdir::WalkDir::new(root).follow_links(options.follow_links); + + for entry in walker + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + .filter(|e| e.path().extension().map(|ext| ext == "md").unwrap_or(false)) + { + let path = entry.path(); + let rel_path = path.strip_prefix(root).unwrap_or(path); + let id = if multi_root { + format!( + "{}/{}", + root.file_name().unwrap_or_default().to_string_lossy(), + rel_path.to_string_lossy().replace('\\', "/") + ) + } else { + rel_path.to_string_lossy().replace('\\', "/") + }; + + match std::fs::read_to_string(path) { + Ok(content) => { + let (frontmatter, body_offset) = extract_frontmatter(&content) + .map(|(fm, off)| (Some(fm), off)) + .unwrap_or((None, 0)); + + let body = &content[body_offset..]; + let wikilinks = extract_wikilinks(body); + let outgoing: Vec = + wikilinks.iter().map(|l| l.target.clone()).collect(); + let block_refs: Vec = + wikilinks.iter().filter_map(|l| l.anchor.clone()).collect(); + + let title = + frontmatter.as_ref().and_then(|fm| fm.title.clone()).or_else(|| { + // Fallback: first H1 heading + body.lines().find_map(|l| { + l.trim().strip_prefix("# ").map(|s| s.trim().to_string()) + }) + }); + + let tags = frontmatter.as_ref().map(|fm| fm.tags.clone()).unwrap_or_default(); + let linked_repo = frontmatter.as_ref().and_then(|fm| fm.repo.clone()); + let fm_raw = frontmatter.map(|fm| fm.raw); + + let note = VaultNote { + id, + path: path.to_string_lossy().to_string(), + title, + content: body.trim().to_string(), + frontmatter: fm_raw, + tags, + outgoing_links: outgoing, + block_refs, + linked_repo, + created_at: Utc::now(), + updated_at: Utc::now(), + }; + + if let Err(e) = crate::registry::vault::save_vault_note(conn, ¬e) { + warn!("Failed to save vault note {}: {}", note.id, e); + } else { + synced += 1; + } + } + Err(e) => { + warn!("Failed to read vault file {:?}: {}", path, e); + } + } + } + } + + info!("Vault scan complete: {} notes synced", synced); + Ok(synced) +} + +/// Legacy API: scan a single vault directory. +/// +/// * `vault_dir` — root of the vault. If `None`, uses the default location. +/// * Returns the number of notes synced. +pub fn scan_vault( + conn: &mut rusqlite::Connection, + vault_dir: Option<&Path>, +) -> anyhow::Result { + let options = ScanOptions { + roots: vault_dir.map(|p| vec![p.to_path_buf()]).unwrap_or_default(), + follow_links: false, + }; + scan_vault_with_options(conn, &options) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::registry::WorkspaceRegistry; + + #[test] + fn test_scan_vault_basic() { + let tmp = std::env::temp_dir().join(format!("devbase_vault_scan_{}", std::process::id())); + std::fs::create_dir_all(&tmp).unwrap(); + std::fs::write( + tmp.join("hello.md"), + "---\ntitle: Hello World\ntags: [rust, cli]\n---\n# Hello World\n\nThis is a [[test]] note.\n", + ) + .unwrap(); + + let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); + let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); + assert_eq!(count, 1); + + std::fs::remove_dir_all(&tmp).unwrap(); + } + + #[test] + fn test_scan_vault_empty_dir() { + let tmp = std::env::temp_dir().join(format!("devbase_vault_empty_{}", std::process::id())); + std::fs::create_dir_all(&tmp).unwrap(); + let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); + let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); + assert_eq!(count, 0); + std::fs::remove_dir_all(&tmp).unwrap(); + } + + #[test] + fn test_scan_vault_missing_dir() { + let tmp = + std::env::temp_dir().join(format!("devbase_vault_missing_{}", std::process::id())); + let mut conn = WorkspaceRegistry::init_in_memory().unwrap(); + let count = scan_vault(&mut conn, Some(&tmp)).unwrap(); + assert_eq!(count, 0); + } +} From fe3574517f96ff3a8bc93c60ed57e98d689e6f96 Mon Sep 17 00:00:00 2001 From: juice094 Date: Sat, 13 Jun 2026 22:43:03 +0800 Subject: [PATCH 11/11] fix: resolve clippy warnings and production unwrap in skill import - Collapse nested if statements flagged by clippy::collapsible_if - Replace map_or(false, ...) with is_some_and(...) - Move parse_github_url before test module in src/commands/skill.rs - Replace Runtime::new().unwrap() with context-propagating error handling --- .../src/frontmatter.rs | 16 ++--- src/commands/skill.rs | 60 +++++++++---------- src/mcp/mod.rs | 12 ++-- src/mcp/tools/skill_sync.rs | 17 +++--- src/registry/import_ontology.rs | 2 +- src/skill_runtime/registry.rs | 8 +-- src/skill_runtime/sources.rs | 2 +- 7 files changed, 58 insertions(+), 59 deletions(-) diff --git a/crates/devbase-skill-runtime-parser/src/frontmatter.rs b/crates/devbase-skill-runtime-parser/src/frontmatter.rs index 233096d..419169d 100644 --- a/crates/devbase-skill-runtime-parser/src/frontmatter.rs +++ b/crates/devbase-skill-runtime-parser/src/frontmatter.rs @@ -54,10 +54,10 @@ pub fn parse_skill_frontmatter(raw: &str) -> SkillFrontmatter { match current_section { Some("inputs") => { // Flush previous input if we see a new "- name:" without closing the last one - if item.starts_with("name:") { - if let Some(input) = current_input.take() { - fm.inputs.push(input); - } + if item.starts_with("name:") + && let Some(input) = current_input.take() + { + fm.inputs.push(input); } if current_input.is_none() && item.starts_with("name:") { current_input = Some(SkillInput::default()); @@ -67,10 +67,10 @@ pub fn parse_skill_frontmatter(raw: &str) -> SkillFrontmatter { } } Some("outputs") => { - if item.starts_with("name:") { - if let Some(output) = current_output.take() { - fm.outputs.push(output); - } + if item.starts_with("name:") + && let Some(output) = current_output.take() + { + fm.outputs.push(output); } if current_output.is_none() && item.starts_with("name:") { current_output = Some(SkillOutput::default()); diff --git a/src/commands/skill.rs b/src/commands/skill.rs index 2f73911..2b26b4b 100644 --- a/src/commands/skill.rs +++ b/src/commands/skill.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: MIT // Copyright (c) 2026 juice094 +use anyhow::Context; use devbase::*; use skill_runtime::{parser, registry}; @@ -488,6 +489,35 @@ pub fn run_skill( Ok(()) } +fn parse_github_url(url: &str) -> anyhow::Result<(String, String)> { + let url = url.trim_end_matches(".git"); + if let Some(rest) = url.strip_prefix("https://github.com/") { + let parts: Vec<&str> = rest.split('/').collect(); + if parts.len() >= 2 { + return Ok((parts[0].to_string(), parts[1].to_string())); + } + } + if let Some(rest) = url.strip_prefix("http://github.com/") { + let parts: Vec<&str> = rest.split('/').collect(); + if parts.len() >= 2 { + return Ok((parts[0].to_string(), parts[1].to_string())); + } + } + if let Some((owner, repo)) = url.split_once('/') + && !owner.is_empty() + && !repo.is_empty() + && !owner.contains("://") + && !owner.contains('\\') + && !owner.contains(' ') + { + return Ok((owner.to_string(), repo.to_string())); + } + Err(anyhow::anyhow!( + "Could not parse GitHub URL: {}. Expected format: owner/repo or https://github.com/owner/repo", + url + )) +} + #[cfg(test)] mod tests { use super::*; @@ -555,33 +585,3 @@ mod tests { assert!(result.is_ok()); } } - -fn parse_github_url(url: &str) -> anyhow::Result<(String, String)> { - let url = url.trim_end_matches(".git"); - if let Some(rest) = url.strip_prefix("https://github.com/") { - let parts: Vec<&str> = rest.split('/').collect(); - if parts.len() >= 2 { - return Ok((parts[0].to_string(), parts[1].to_string())); - } - } - if let Some(rest) = url.strip_prefix("http://github.com/") { - let parts: Vec<&str> = rest.split('/').collect(); - if parts.len() >= 2 { - return Ok((parts[0].to_string(), parts[1].to_string())); - } - } - if let Some((owner, repo)) = url.split_once('/') { - if !owner.is_empty() - && !repo.is_empty() - && !owner.contains("://") - && !owner.contains('\\') - && !owner.contains(' ') - { - return Ok((owner.to_string(), repo.to_string())); - } - } - Err(anyhow::anyhow!( - "Could not parse GitHub URL: {}. Expected format: owner/repo or https://github.com/owner/repo", - url - )) -} diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index d39c255..7d1009d 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -491,12 +491,12 @@ fn append_mcp_oplog(tool_name: &str, duration_ms: u128, success: bool, error_typ "error_type": error_type, }); - if let Ok(mut guard) = get_oplog_file().lock() { - if let Some(ref mut file) = *guard { - use std::io::Write; - if let Err(e) = writeln!(file, "{}", entry) { - tracing::warn!("Failed to write MCP oplog: {}", e); - } + if let Ok(mut guard) = get_oplog_file().lock() + && let Some(ref mut file) = *guard + { + use std::io::Write; + if let Err(e) = writeln!(file, "{}", entry) { + tracing::warn!("Failed to write MCP oplog: {}", e); } } } diff --git a/src/mcp/tools/skill_sync.rs b/src/mcp/tools/skill_sync.rs index 47db2da..5daefb7 100644 --- a/src/mcp/tools/skill_sync.rs +++ b/src/mcp/tools/skill_sync.rs @@ -140,15 +140,14 @@ fn parse_github_url(url: &str) -> anyhow::Result<(String, String)> { } } // Bare owner/repo format - if let Some((owner, repo)) = url.split_once('/') { - if !owner.is_empty() - && !repo.is_empty() - && !owner.contains("://") - && !owner.contains('\\') - && !owner.contains(' ') - { - return Ok((owner.to_string(), repo.to_string())); - } + if let Some((owner, repo)) = url.split_once('/') + && !owner.is_empty() + && !repo.is_empty() + && !owner.contains("://") + && !owner.contains('\\') + && !owner.contains(' ') + { + return Ok((owner.to_string(), repo.to_string())); } Err(anyhow::anyhow!( "Could not parse GitHub URL: {}. Expected format: owner/repo or https://github.com/owner/repo", diff --git a/src/registry/import_ontology.rs b/src/registry/import_ontology.rs index 38cb1f2..9636db4 100644 --- a/src/registry/import_ontology.rs +++ b/src/registry/import_ontology.rs @@ -27,7 +27,7 @@ pub fn import_ontology(conn: &Connection, workspace_path: &Path) -> Result { stats.entities_added += added; diff --git a/src/skill_runtime/registry.rs b/src/skill_runtime/registry.rs index 4216bcf..e1aef9c 100644 --- a/src/skill_runtime/registry.rs +++ b/src/skill_runtime/registry.rs @@ -218,10 +218,10 @@ pub fn search_skills_text( category: Option<&str>, ) -> anyhow::Result> { // Try FTS5 first with BM25 ranking - if let Ok(results) = search_skills_fts5(conn, query, limit, category) { - if !results.is_empty() { - return Ok(results); - } + if let Ok(results) = search_skills_fts5(conn, query, limit, category) + && !results.is_empty() + { + return Ok(results); } // Fallback to LIKE for empty results or FTS5 errors search_skills_like(conn, query, limit, category) diff --git a/src/skill_runtime/sources.rs b/src/skill_runtime/sources.rs index 623979c..a819cb3 100644 --- a/src/skill_runtime/sources.rs +++ b/src/skill_runtime/sources.rs @@ -164,7 +164,7 @@ fn scan_dir_for_skills(dir: &Path, skills: &mut Vec) -> anyhow::Resul let path = entry.path(); if path.is_dir() { scan_dir_for_skills(&path, skills)?; - } else if path.extension().map_or(false, |e| e == "md") { + } else if path.extension().is_some_and(|e| e == "md") { let content = std::fs::read_to_string(&path)?; let name = path.file_stem().unwrap_or_default().to_string_lossy().to_string(); let skill_id = name.to_lowercase().replace('_', "-");