From 75e46620b4ce84a0a712260e2eecf70370223c4d Mon Sep 17 00:00:00 2001 From: Robert Date: Thu, 22 Jan 2026 20:17:43 -0500 Subject: [PATCH 1/2] test: add extended tool tests for bash, todo, undo, diff, and git Add comprehensive test coverage for additional tools: - Bash tool: command execution, stderr, exit codes, timeout, dangerous command warnings - Todo tools: add, update, remove, clear, read, filter by status - Undo tool: backup creation, restoration, listing - Diff tool: file comparison, content comparison, identical detection - Git tools: status, diff, commit with auto-generated and custom messages Increases test count from 83 to 116 tests. Co-Authored-By: Claude Opus 4.5 --- test/tools-extended.test.ts | 389 ++++++++++++++++++++++++++++++++++++ 1 file changed, 389 insertions(+) create mode 100644 test/tools-extended.test.ts diff --git a/test/tools-extended.test.ts b/test/tools-extended.test.ts new file mode 100644 index 0000000..c2a4079 --- /dev/null +++ b/test/tools-extended.test.ts @@ -0,0 +1,389 @@ +import { describe, it, before, after, beforeEach } from 'node:test'; +import assert from 'node:assert'; +import { writeFileSync, mkdirSync, rmSync, existsSync, readFileSync } from 'fs'; +import { join } from 'path'; +import { bashTool } from '../src/tools/bash.js'; +import { todoWriteTool, todoReadTool, resetTodos, getTodos } from '../src/tools/todo.js'; +import { undoTool, createBackup, restoreBackup, listBackups } from '../src/tools/undo.js'; +import { diffTool } from '../src/tools/diff.js'; +import { gitStatusTool, gitDiffTool, gitCommitTool } from '../src/tools/git.js'; +import { spawnSync } from 'child_process'; + +const TEST_DIR = join(process.cwd(), 'test-tmp-extended'); +const GIT_TEST_DIR = join(process.cwd(), 'test-tmp-git'); + +describe('Bash Tool', () => { + it('should execute simple commands', async () => { + const result = await bashTool.execute({ command: 'echo "hello world"' }); + assert.ok(result.includes('hello world')); + }); + + it('should capture stderr', async () => { + const result = await bashTool.execute({ command: 'echo "error" >&2' }); + assert.ok(result.includes('stderr')); + assert.ok(result.includes('error')); + }); + + it('should return exit code on failure', async () => { + const result = await bashTool.execute({ command: 'exit 42' }); + assert.ok(result.includes('Exit code: 42')); + }); + + it('should handle command not found', async () => { + const result = await bashTool.execute({ command: 'nonexistent_command_12345' }); + assert.ok(result.includes('not found') || result.includes('Exit code')); + }); + + it('should respect timeout', async () => { + const result = await bashTool.execute({ + command: 'sleep 10', + timeout: 100, + }); + assert.ok(result.includes('timeout') || result.includes('killed')); + }); + + it('should warn about dangerous commands', async () => { + // Safe command with rm pattern in echo (shouldn't trigger warning) + const _safeResult = await bashTool.execute({ command: 'echo "rm -rf /" && echo safe' }); + // The actual rm -rf / pattern triggers warning + const dangerousResult = await bashTool.execute({ command: 'rm -rf /' }); + assert.ok(dangerousResult.includes('WARNING') || dangerousResult.includes('dangerous')); + }); +}); + +describe('Todo Tools', () => { + beforeEach(() => { + resetTodos(); + }); + + it('should add a todo', async () => { + const result = await todoWriteTool.execute({ + action: 'add', + content: 'Test task', + }); + assert.ok(result.includes('Added')); + assert.ok(result.includes('Test task')); + + const todos = getTodos(); + assert.strictEqual(todos.length, 1); + assert.strictEqual(todos[0].content, 'Test task'); + assert.strictEqual(todos[0].status, 'pending'); + }); + + it('should require content for add action', async () => { + const result = await todoWriteTool.execute({ action: 'add' }); + assert.ok(result.includes('Error')); + assert.ok(result.includes('content')); + }); + + it('should update a todo status', async () => { + await todoWriteTool.execute({ action: 'add', content: 'Task 1' }); + + const result = await todoWriteTool.execute({ + action: 'update', + id: 1, + status: 'in_progress', + }); + assert.ok(result.includes('Updated')); + + const todos = getTodos(); + assert.strictEqual(todos[0].status, 'in_progress'); + }); + + it('should update todo content', async () => { + await todoWriteTool.execute({ action: 'add', content: 'Original' }); + + await todoWriteTool.execute({ + action: 'update', + id: 1, + content: 'Updated content', + }); + + const todos = getTodos(); + assert.strictEqual(todos[0].content, 'Updated content'); + }); + + it('should remove a todo', async () => { + await todoWriteTool.execute({ action: 'add', content: 'Task 1' }); + await todoWriteTool.execute({ action: 'add', content: 'Task 2' }); + + const result = await todoWriteTool.execute({ action: 'remove', id: 1 }); + assert.ok(result.includes('Removed')); + + const todos = getTodos(); + assert.strictEqual(todos.length, 1); + assert.strictEqual(todos[0].content, 'Task 2'); + }); + + it('should clear all todos', async () => { + await todoWriteTool.execute({ action: 'add', content: 'Task 1' }); + await todoWriteTool.execute({ action: 'add', content: 'Task 2' }); + + const result = await todoWriteTool.execute({ action: 'clear' }); + assert.ok(result.includes('Cleared')); + assert.ok(result.includes('2')); + + const todos = getTodos(); + assert.strictEqual(todos.length, 0); + }); + + it('should read all todos', async () => { + await todoWriteTool.execute({ action: 'add', content: 'Task 1' }); + await todoWriteTool.execute({ action: 'add', content: 'Task 2' }); + + const result = await todoReadTool.execute({}); + assert.ok(result.includes('Task 1')); + assert.ok(result.includes('Task 2')); + }); + + it('should filter todos by status', async () => { + await todoWriteTool.execute({ action: 'add', content: 'Pending task' }); + await todoWriteTool.execute({ action: 'add', content: 'Completed task' }); + await todoWriteTool.execute({ action: 'update', id: 2, status: 'completed' }); + + const pending = await todoReadTool.execute({ filter: 'pending' }); + assert.ok(pending.includes('Pending task')); + assert.ok(!pending.includes('Completed task')); + + const completed = await todoReadTool.execute({ filter: 'completed' }); + assert.ok(completed.includes('Completed task')); + assert.ok(!completed.includes('Pending task')); + }); + + it('should return empty message when no todos', async () => { + const result = await todoReadTool.execute({}); + assert.ok(result.includes('No todos')); + }); +}); + +describe('Undo Tool', () => { + before(() => { + if (!existsSync(TEST_DIR)) { + mkdirSync(TEST_DIR, { recursive: true }); + } + }); + + after(() => { + if (existsSync(TEST_DIR)) { + rmSync(TEST_DIR, { recursive: true, force: true }); + } + }); + + it('should create backup of existing file', () => { + const testFile = join(TEST_DIR, 'backup-test.txt'); + writeFileSync(testFile, 'original content'); + + const backupPath = createBackup(testFile, 'edit'); + assert.ok(backupPath !== null); + assert.ok(existsSync(backupPath!)); + + const backupContent = readFileSync(backupPath!, 'utf-8'); + assert.strictEqual(backupContent, 'original content'); + }); + + it('should return null for non-existent file', () => { + const backupPath = createBackup(join(TEST_DIR, 'nonexistent.txt'), 'write'); + assert.strictEqual(backupPath, null); + }); + + it('should restore from backup', () => { + const testFile = join(TEST_DIR, 'restore-test.txt'); + writeFileSync(testFile, 'original'); + + createBackup(testFile, 'edit'); + writeFileSync(testFile, 'modified'); + + const result = restoreBackup(testFile); + assert.ok(result.success); + + const content = readFileSync(testFile, 'utf-8'); + assert.strictEqual(content, 'original'); + }); + + it('should list available backups', () => { + const testFile = join(TEST_DIR, 'list-test.txt'); + writeFileSync(testFile, 'content'); + + createBackup(testFile, 'edit'); + + const backups = listBackups(testFile); + assert.ok(backups.length > 0); + assert.strictEqual(backups[0].originalPath, testFile); + }); + + it('should list all backups via undo tool', async () => { + const testFile = join(TEST_DIR, 'undo-list-test.txt'); + writeFileSync(testFile, 'content'); + createBackup(testFile, 'write'); + + const result = await undoTool.execute({}); + assert.ok(result.includes('Available backups') || result.includes('backups')); + }); +}); + +describe('Diff Tool', () => { + before(() => { + if (!existsSync(TEST_DIR)) { + mkdirSync(TEST_DIR, { recursive: true }); + } + }); + + after(() => { + if (existsSync(TEST_DIR)) { + rmSync(TEST_DIR, { recursive: true, force: true }); + } + }); + + it('should compare two files', async () => { + const file1 = join(TEST_DIR, 'diff1.txt'); + const file2 = join(TEST_DIR, 'diff2.txt'); + + writeFileSync(file1, 'line 1\nline 2\nline 3'); + writeFileSync(file2, 'line 1\nline 2 modified\nline 3'); + + const result = await diffTool.execute({ + file1: file1, + file2: file2, + }); + + assert.ok(result.includes('line 2') || result.includes('modified')); + }); + + it('should compare content strings', async () => { + const result = await diffTool.execute({ + content1: 'hello\nworld', + content2: 'hello\nuniverse', + }); + + assert.ok(result.includes('world') || result.includes('universe')); + }); + + it('should handle identical content', async () => { + const result = await diffTool.execute({ + content1: 'same content', + content2: 'same content', + }); + + assert.ok(result.includes('identical') || result.includes('No diff') || result === ''); + }); +}); + +describe('Git Tools', () => { + const originalCwd = process.cwd(); + + before(() => { + // Create a fresh test directory with git repo + if (existsSync(GIT_TEST_DIR)) { + rmSync(GIT_TEST_DIR, { recursive: true, force: true }); + } + mkdirSync(GIT_TEST_DIR, { recursive: true }); + + // Initialize git repo + spawnSync('git', ['init'], { cwd: GIT_TEST_DIR }); + spawnSync('git', ['config', 'user.email', 'test@test.com'], { cwd: GIT_TEST_DIR }); + spawnSync('git', ['config', 'user.name', 'Test User'], { cwd: GIT_TEST_DIR }); + + // Create initial commit + writeFileSync(join(GIT_TEST_DIR, 'README.md'), '# Test Repo\n'); + spawnSync('git', ['add', '.'], { cwd: GIT_TEST_DIR }); + spawnSync('git', ['commit', '-m', 'Initial commit'], { cwd: GIT_TEST_DIR }); + + // Change to test directory + process.chdir(GIT_TEST_DIR); + }); + + after(() => { + // Restore original directory + process.chdir(originalCwd); + + // Cleanup + if (existsSync(GIT_TEST_DIR)) { + rmSync(GIT_TEST_DIR, { recursive: true, force: true }); + } + }); + + it('should show clean status when no changes', async () => { + const result = await gitStatusTool.execute({}); + assert.ok(result.includes('clean') || result.includes('nothing to commit')); + }); + + it('should show status with short format', async () => { + // Create an untracked file + writeFileSync(join(GIT_TEST_DIR, 'untracked.txt'), 'test'); + + const result = await gitStatusTool.execute({ short: true }); + assert.ok(result.includes('??') || result.includes('untracked')); + + // Cleanup + rmSync(join(GIT_TEST_DIR, 'untracked.txt')); + }); + + it('should show diff for modified files', async () => { + // Modify a file + writeFileSync(join(GIT_TEST_DIR, 'README.md'), '# Test Repo\n\nUpdated content\n'); + + const result = await gitDiffTool.execute({}); + assert.ok(result.includes('Updated') || result.includes('README')); + + // Reset + spawnSync('git', ['checkout', 'README.md'], { cwd: GIT_TEST_DIR }); + }); + + it('should show diff with stat format', async () => { + // Modify a file + writeFileSync(join(GIT_TEST_DIR, 'README.md'), '# Test Repo\n\nMore content\n'); + + const result = await gitDiffTool.execute({ stat: true }); + assert.ok(result.includes('README') || result.includes('insertion') || result.includes('+')); + + // Reset + spawnSync('git', ['checkout', 'README.md'], { cwd: GIT_TEST_DIR }); + }); + + it('should return no changes when nothing modified', async () => { + const result = await gitDiffTool.execute({}); + assert.ok(result.includes('No changes') || result === ''); + }); + + it('should commit with auto-generated message', async () => { + // Create a new file + writeFileSync(join(GIT_TEST_DIR, 'newfile.txt'), 'new content'); + + const result = await gitCommitTool.execute({ files: 'newfile.txt' }); + assert.ok(result.includes('Committed') || result.includes('newfile')); + }); + + it('should commit with custom message', async () => { + // Create another file + writeFileSync(join(GIT_TEST_DIR, 'another.txt'), 'another file'); + + const result = await gitCommitTool.execute({ + files: 'another.txt', + message: 'feat: add another file', + }); + assert.ok(result.includes('feat: add another file')); + }); + + it('should handle nothing to commit', async () => { + const result = await gitCommitTool.execute({}); + assert.ok(result.includes('Nothing to commit') || result.includes('no staged')); + }); + + it('should reject file paths starting with dash', async () => { + const result = await gitCommitTool.execute({ files: '-rf' }); + assert.ok(result.includes('Error') || result.includes('Invalid')); + }); + + it('should show staged changes only', async () => { + // Create and stage a file + writeFileSync(join(GIT_TEST_DIR, 'staged.txt'), 'staged content'); + spawnSync('git', ['add', 'staged.txt'], { cwd: GIT_TEST_DIR }); + + const result = await gitDiffTool.execute({ staged: true }); + assert.ok(result.includes('staged') || result.includes('+')); + + // Cleanup - unstage + spawnSync('git', ['reset', 'HEAD', 'staged.txt'], { cwd: GIT_TEST_DIR }); + rmSync(join(GIT_TEST_DIR, 'staged.txt')); + }); +}); From 7a2bb659ee2657adf70cd8c04ecf3fc38d9231d9 Mon Sep 17 00:00:00 2001 From: Robert Date: Thu, 22 Jan 2026 20:44:20 -0500 Subject: [PATCH 2/2] fix: resolve 6 high-severity bugs - hooks/executor.ts: Track hook errors in metadata, stop execution for critical hooks (priority < 10) on failure instead of silently continuing - rpc/client.ts: Log JSON parse errors with content preview instead of silently ignoring; add cleanupStaleRequests() to prevent memory leaks from unbounded pendingRequests map - rpc/types.ts: Add createdAt timestamp to PendingRequest for cleanup - skills/parser.ts: Capture full error details (stderr, exit code) for command failures; add security documentation for command execution - background.ts: Add automatic cleanup of completed tasks (>1hr old), cap total tasks at 100 to prevent unbounded memory growth Co-Authored-By: Claude Opus 4.5 --- src/background.ts | 55 +++++++++++++++++++++++++++++++++++++++++++ src/hooks/executor.ts | 24 ++++++++++++++++++- src/rpc/client.ts | 32 ++++++++++++++++++++++--- src/rpc/types.ts | 1 + src/skills/parser.ts | 22 +++++++++++++++-- 5 files changed, 128 insertions(+), 6 deletions(-) diff --git a/src/background.ts b/src/background.ts index e1861ba..9a76382 100644 --- a/src/background.ts +++ b/src/background.ts @@ -18,11 +18,15 @@ export interface BackgroundTask { class BackgroundTaskManager { private tasks = new Map(); private nextId = 1; + private static readonly MAX_TASKS = 100; // Maximum tasks to retain + private static readonly MAX_COMPLETED_AGE_MS = 3600000; // 1 hour /** * Start a new background task */ start(command: string, timeout = 600000): string { + // Auto-cleanup old completed tasks before starting new one + this.autoCleanup(); const id = `bg_${this.nextId++}`; const task: BackgroundTask = { id, @@ -151,6 +155,57 @@ class BackgroundTaskManager { } return count; } + + /** + * Automatic cleanup to prevent unbounded memory growth + * - Removes completed tasks older than MAX_COMPLETED_AGE_MS + * - If still over MAX_TASKS, removes oldest completed tasks + */ + private autoCleanup(): void { + const now = Date.now(); + + // First pass: remove old completed tasks + for (const [id, task] of this.tasks) { + if (task.status !== 'running' && task.completedAt) { + const age = now - task.completedAt.getTime(); + if (age > BackgroundTaskManager.MAX_COMPLETED_AGE_MS) { + this.tasks.delete(id); + } + } + } + + // Second pass: if still over limit, remove oldest completed tasks + if (this.tasks.size >= BackgroundTaskManager.MAX_TASKS) { + const completedTasks = Array.from(this.tasks.entries()) + .filter(([, task]) => task.status !== 'running') + .sort((a, b) => { + const timeA = a[1].completedAt?.getTime() ?? 0; + const timeB = b[1].completedAt?.getTime() ?? 0; + return timeA - timeB; // Oldest first + }); + + // Remove oldest completed tasks to get under limit + const toRemove = this.tasks.size - BackgroundTaskManager.MAX_TASKS + 10; // Leave some room + for (let i = 0; i < Math.min(toRemove, completedTasks.length); i++) { + this.tasks.delete(completedTasks[i][0]); + } + } + } + + /** + * Get task manager statistics + */ + getStats(): { total: number; running: number; completed: number; failed: number } { + let running = 0; + let completed = 0; + let failed = 0; + for (const task of this.tasks.values()) { + if (task.status === 'running') running++; + else if (task.status === 'completed') completed++; + else failed++; + } + return { total: this.tasks.size, running, completed, failed }; + } } // Singleton instance diff --git a/src/hooks/executor.ts b/src/hooks/executor.ts index 3c4a996..57692c7 100644 --- a/src/hooks/executor.ts +++ b/src/hooks/executor.ts @@ -56,7 +56,29 @@ export async function executeHooks( aggregatedResult.metadata = { ...aggregatedResult.metadata, ...result.metadata }; } } catch (error) { - console.error(`Hook ${hook.name} failed:`, error); + const errorMessage = error instanceof Error ? error.message : String(error); + console.error(`Hook ${hook.name} failed:`, errorMessage); + + // Track hook errors in metadata for visibility + aggregatedResult.metadata = { + ...aggregatedResult.metadata, + hookErrors: [ + ...((aggregatedResult.metadata?.hookErrors as string[]) || []), + `${hook.name}: ${errorMessage}`, + ], + }; + + // For critical hooks (priority < 10), stop execution on failure + // Default priority is 100, so only explicitly low-priority hooks are critical + if ((hook.priority ?? 100) < 10) { + return { + continue: false, + metadata: { + ...aggregatedResult.metadata, + criticalHookFailed: hook.name, + }, + }; + } } } diff --git a/src/rpc/client.ts b/src/rpc/client.ts index 14e4203..088b53a 100644 --- a/src/rpc/client.ts +++ b/src/rpc/client.ts @@ -119,11 +119,32 @@ export class RpcClient { } } } - } catch { - // Ignore parse errors + } catch (error) { + // Log parse errors for debugging - malformed JSON from servers is a real issue + const errorMessage = error instanceof Error ? error.message : String(error); + console.error(`[RPC] JSON parse error: ${errorMessage}`); + console.error(`[RPC] Content preview: ${content.slice(0, 100)}...`); } } + /** + * Cleanup stale pending requests (older than maxAge ms) + * Call this periodically to prevent memory leaks + */ + cleanupStaleRequests(maxAge = 300000): number { + const now = Date.now(); + let cleaned = 0; + for (const [id, pending] of this.pendingRequests.entries()) { + if (pending.createdAt && now - pending.createdAt > maxAge) { + if (pending.timeoutId) clearTimeout(pending.timeoutId); + pending.reject(new Error('Request expired during cleanup')); + this.pendingRequests.delete(id); + cleaned++; + } + } + return cleaned; + } + private send(msg: object): void { if (!this.process?.stdin) return; @@ -148,7 +169,12 @@ export class RpcClient { reject(new Error(`Request timeout: ${method}`)); }, this.options.timeout); - this.pendingRequests.set(id, { resolve: resolve as (v: unknown) => void, reject, timeoutId }); + this.pendingRequests.set(id, { + resolve: resolve as (v: unknown) => void, + reject, + timeoutId, + createdAt: Date.now(), + }); this.send(request); }); } diff --git a/src/rpc/types.ts b/src/rpc/types.ts index 9d2832d..f356828 100644 --- a/src/rpc/types.ts +++ b/src/rpc/types.ts @@ -43,4 +43,5 @@ export interface PendingRequest { resolve: (value: unknown) => void; reject: (error: Error) => void; timeoutId?: ReturnType; + createdAt: number; // Timestamp for cleanup of stale requests } diff --git a/src/skills/parser.ts b/src/skills/parser.ts index 594b2c4..404a601 100644 --- a/src/skills/parser.ts +++ b/src/skills/parser.ts @@ -176,6 +176,10 @@ function resolveFileVariable(path: string, errors: string[]): string { /** * Execute a command from a skill template. * Commands in skill files are user-defined and trusted. + * + * SECURITY NOTE: This intentionally executes shell commands from skill files. + * Skills are user-authored and considered trusted content. + * Only use skills from sources you trust. */ function resolveCommandVariable(command: string, errors: string[]): string { if (!command) { @@ -185,14 +189,28 @@ function resolveCommandVariable(command: string, errors: string[]): string { try { // User-defined command from skill file - intentionally executes shell commands + // This is safe because skills are user-authored trusted content const output = execSync(command, { encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'], }); return output.trim(); - } catch { - errors.push(`Command failed: ${command}`); + } catch (error) { + // Capture actual error details for debugging + let errorDetail = 'Unknown error'; + if (error instanceof Error) { + errorDetail = error.message; + // For exec errors, stderr is often in the error object + const execError = error as Error & { stderr?: string; status?: number }; + if (execError.stderr) { + errorDetail = execError.stderr.trim() || errorDetail; + } + if (execError.status !== undefined) { + errorDetail += ` (exit code: ${execError.status})`; + } + } + errors.push(`Command failed: ${command} - ${errorDetail}`); return `[Command failed: ${command}]`; } }