From 75e46620b4ce84a0a712260e2eecf70370223c4d Mon Sep 17 00:00:00 2001
From: Robert <robert.alexander37899@gmail.com>
Date: Thu, 22 Jan 2026 20:17:43 -0500
Subject: [PATCH 1/2] test: add extended tool tests for bash, todo, undo, diff,
 and git

Add comprehensive test coverage for additional tools:
- Bash tool: command execution, stderr, exit codes, timeout, dangerous command warnings
- Todo tools: add, update, remove, clear, read, filter by status
- Undo tool: backup creation, restoration, listing
- Diff tool: file comparison, content comparison, identical detection
- Git tools: status, diff, commit with auto-generated and custom messages

Increases test count from 83 to 116 tests.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/tools-extended.test.ts | 389 ++++++++++++++++++++++++++++++++++++
 1 file changed, 389 insertions(+)
 create mode 100644 test/tools-extended.test.ts

diff --git a/test/tools-extended.test.ts b/test/tools-extended.test.ts
new file mode 100644
index 0000000..c2a4079
--- /dev/null
+++ b/test/tools-extended.test.ts
@@ -0,0 +1,389 @@
+import { describe, it, before, after, beforeEach } from 'node:test';
+import assert from 'node:assert';
+import { writeFileSync, mkdirSync, rmSync, existsSync, readFileSync } from 'fs';
+import { join } from 'path';
+import { bashTool } from '../src/tools/bash.js';
+import { todoWriteTool, todoReadTool, resetTodos, getTodos } from '../src/tools/todo.js';
+import { undoTool, createBackup, restoreBackup, listBackups } from '../src/tools/undo.js';
+import { diffTool } from '../src/tools/diff.js';
+import { gitStatusTool, gitDiffTool, gitCommitTool } from '../src/tools/git.js';
+import { spawnSync } from 'child_process';
+
+const TEST_DIR = join(process.cwd(), 'test-tmp-extended');
+const GIT_TEST_DIR = join(process.cwd(), 'test-tmp-git');
+
+describe('Bash Tool', () => {
+  it('should execute simple commands', async () => {
+    const result = await bashTool.execute({ command: 'echo "hello world"' });
+    assert.ok(result.includes('hello world'));
+  });
+
+  it('should capture stderr', async () => {
+    const result = await bashTool.execute({ command: 'echo "error" >&2' });
+    assert.ok(result.includes('stderr'));
+    assert.ok(result.includes('error'));
+  });
+
+  it('should return exit code on failure', async () => {
+    const result = await bashTool.execute({ command: 'exit 42' });
+    assert.ok(result.includes('Exit code: 42'));
+  });
+
+  it('should handle command not found', async () => {
+    const result = await bashTool.execute({ command: 'nonexistent_command_12345' });
+    assert.ok(result.includes('not found') || result.includes('Exit code'));
+  });
+
+  it('should respect timeout', async () => {
+    const result = await bashTool.execute({
+      command: 'sleep 10',
+      timeout: 100,
+    });
+    assert.ok(result.includes('timeout') || result.includes('killed'));
+  });
+
+  it('should warn about dangerous commands', async () => {
+    // Safe command with rm pattern in echo (shouldn't trigger warning)
+    const _safeResult = await bashTool.execute({ command: 'echo "rm -rf /" && echo safe' });
+    // The actual rm -rf / pattern triggers warning
+    const dangerousResult = await bashTool.execute({ command: 'rm -rf /' });
+    assert.ok(dangerousResult.includes('WARNING') || dangerousResult.includes('dangerous'));
+  });
+});
+
+describe('Todo Tools', () => {
+  beforeEach(() => {
+    resetTodos();
+  });
+
+  it('should add a todo', async () => {
+    const result = await todoWriteTool.execute({
+      action: 'add',
+      content: 'Test task',
+    });
+    assert.ok(result.includes('Added'));
+    assert.ok(result.includes('Test task'));
+
+    const todos = getTodos();
+    assert.strictEqual(todos.length, 1);
+    assert.strictEqual(todos[0].content, 'Test task');
+    assert.strictEqual(todos[0].status, 'pending');
+  });
+
+  it('should require content for add action', async () => {
+    const result = await todoWriteTool.execute({ action: 'add' });
+    assert.ok(result.includes('Error'));
+    assert.ok(result.includes('content'));
+  });
+
+  it('should update a todo status', async () => {
+    await todoWriteTool.execute({ action: 'add', content: 'Task 1' });
+
+    const result = await todoWriteTool.execute({
+      action: 'update',
+      id: 1,
+      status: 'in_progress',
+    });
+    assert.ok(result.includes('Updated'));
+
+    const todos = getTodos();
+    assert.strictEqual(todos[0].status, 'in_progress');
+  });
+
+  it('should update todo content', async () => {
+    await todoWriteTool.execute({ action: 'add', content: 'Original' });
+
+    await todoWriteTool.execute({
+      action: 'update',
+      id: 1,
+      content: 'Updated content',
+    });
+
+    const todos = getTodos();
+    assert.strictEqual(todos[0].content, 'Updated content');
+  });
+
+  it('should remove a todo', async () => {
+    await todoWriteTool.execute({ action: 'add', content: 'Task 1' });
+    await todoWriteTool.execute({ action: 'add', content: 'Task 2' });
+
+    const result = await todoWriteTool.execute({ action: 'remove', id: 1 });
+    assert.ok(result.includes('Removed'));
+
+    const todos = getTodos();
+    assert.strictEqual(todos.length, 1);
+    assert.strictEqual(todos[0].content, 'Task 2');
+  });
+
+  it('should clear all todos', async () => {
+    await todoWriteTool.execute({ action: 'add', content: 'Task 1' });
+    await todoWriteTool.execute({ action: 'add', content: 'Task 2' });
+
+    const result = await todoWriteTool.execute({ action: 'clear' });
+    assert.ok(result.includes('Cleared'));
+    assert.ok(result.includes('2'));
+
+    const todos = getTodos();
+    assert.strictEqual(todos.length, 0);
+  });
+
+  it('should read all todos', async () => {
+    await todoWriteTool.execute({ action: 'add', content: 'Task 1' });
+    await todoWriteTool.execute({ action: 'add', content: 'Task 2' });
+
+    const result = await todoReadTool.execute({});
+    assert.ok(result.includes('Task 1'));
+    assert.ok(result.includes('Task 2'));
+  });
+
+  it('should filter todos by status', async () => {
+    await todoWriteTool.execute({ action: 'add', content: 'Pending task' });
+    await todoWriteTool.execute({ action: 'add', content: 'Completed task' });
+    await todoWriteTool.execute({ action: 'update', id: 2, status: 'completed' });
+
+    const pending = await todoReadTool.execute({ filter: 'pending' });
+    assert.ok(pending.includes('Pending task'));
+    assert.ok(!pending.includes('Completed task'));
+
+    const completed = await todoReadTool.execute({ filter: 'completed' });
+    assert.ok(completed.includes('Completed task'));
+    assert.ok(!completed.includes('Pending task'));
+  });
+
+  it('should return empty message when no todos', async () => {
+    const result = await todoReadTool.execute({});
+    assert.ok(result.includes('No todos'));
+  });
+});
+
+describe('Undo Tool', () => {
+  before(() => {
+    if (!existsSync(TEST_DIR)) {
+      mkdirSync(TEST_DIR, { recursive: true });
+    }
+  });
+
+  after(() => {
+    if (existsSync(TEST_DIR)) {
+      rmSync(TEST_DIR, { recursive: true, force: true });
+    }
+  });
+
+  it('should create backup of existing file', () => {
+    const testFile = join(TEST_DIR, 'backup-test.txt');
+    writeFileSync(testFile, 'original content');
+
+    const backupPath = createBackup(testFile, 'edit');
+    assert.ok(backupPath !== null);
+    assert.ok(existsSync(backupPath!));
+
+    const backupContent = readFileSync(backupPath!, 'utf-8');
+    assert.strictEqual(backupContent, 'original content');
+  });
+
+  it('should return null for non-existent file', () => {
+    const backupPath = createBackup(join(TEST_DIR, 'nonexistent.txt'), 'write');
+    assert.strictEqual(backupPath, null);
+  });
+
+  it('should restore from backup', () => {
+    const testFile = join(TEST_DIR, 'restore-test.txt');
+    writeFileSync(testFile, 'original');
+
+    createBackup(testFile, 'edit');
+    writeFileSync(testFile, 'modified');
+
+    const result = restoreBackup(testFile);
+    assert.ok(result.success);
+
+    const content = readFileSync(testFile, 'utf-8');
+    assert.strictEqual(content, 'original');
+  });
+
+  it('should list available backups', () => {
+    const testFile = join(TEST_DIR, 'list-test.txt');
+    writeFileSync(testFile, 'content');
+
+    createBackup(testFile, 'edit');
+
+    const backups = listBackups(testFile);
+    assert.ok(backups.length > 0);
+    assert.strictEqual(backups[0].originalPath, testFile);
+  });
+
+  it('should list all backups via undo tool', async () => {
+    const testFile = join(TEST_DIR, 'undo-list-test.txt');
+    writeFileSync(testFile, 'content');
+    createBackup(testFile, 'write');
+
+    const result = await undoTool.execute({});
+    assert.ok(result.includes('Available backups') || result.includes('backups'));
+  });
+});
+
+describe('Diff Tool', () => {
+  before(() => {
+    if (!existsSync(TEST_DIR)) {
+      mkdirSync(TEST_DIR, { recursive: true });
+    }
+  });
+
+  after(() => {
+    if (existsSync(TEST_DIR)) {
+      rmSync(TEST_DIR, { recursive: true, force: true });
+    }
+  });
+
+  it('should compare two files', async () => {
+    const file1 = join(TEST_DIR, 'diff1.txt');
+    const file2 = join(TEST_DIR, 'diff2.txt');
+
+    writeFileSync(file1, 'line 1\nline 2\nline 3');
+    writeFileSync(file2, 'line 1\nline 2 modified\nline 3');
+
+    const result = await diffTool.execute({
+      file1: file1,
+      file2: file2,
+    });
+
+    assert.ok(result.includes('line 2') || result.includes('modified'));
+  });
+
+  it('should compare content strings', async () => {
+    const result = await diffTool.execute({
+      content1: 'hello\nworld',
+      content2: 'hello\nuniverse',
+    });
+
+    assert.ok(result.includes('world') || result.includes('universe'));
+  });
+
+  it('should handle identical content', async () => {
+    const result = await diffTool.execute({
+      content1: 'same content',
+      content2: 'same content',
+    });
+
+    assert.ok(result.includes('identical') || result.includes('No diff') || result === '');
+  });
+});
+
+describe('Git Tools', () => {
+  const originalCwd = process.cwd();
+
+  before(() => {
+    // Create a fresh test directory with git repo
+    if (existsSync(GIT_TEST_DIR)) {
+      rmSync(GIT_TEST_DIR, { recursive: true, force: true });
+    }
+    mkdirSync(GIT_TEST_DIR, { recursive: true });
+
+    // Initialize git repo
+    spawnSync('git', ['init'], { cwd: GIT_TEST_DIR });
+    spawnSync('git', ['config', 'user.email', 'test@test.com'], { cwd: GIT_TEST_DIR });
+    spawnSync('git', ['config', 'user.name', 'Test User'], { cwd: GIT_TEST_DIR });
+
+    // Create initial commit
+    writeFileSync(join(GIT_TEST_DIR, 'README.md'), '# Test Repo\n');
+    spawnSync('git', ['add', '.'], { cwd: GIT_TEST_DIR });
+    spawnSync('git', ['commit', '-m', 'Initial commit'], { cwd: GIT_TEST_DIR });
+
+    // Change to test directory
+    process.chdir(GIT_TEST_DIR);
+  });
+
+  after(() => {
+    // Restore original directory
+    process.chdir(originalCwd);
+
+    // Cleanup
+    if (existsSync(GIT_TEST_DIR)) {
+      rmSync(GIT_TEST_DIR, { recursive: true, force: true });
+    }
+  });
+
+  it('should show clean status when no changes', async () => {
+    const result = await gitStatusTool.execute({});
+    assert.ok(result.includes('clean') || result.includes('nothing to commit'));
+  });
+
+  it('should show status with short format', async () => {
+    // Create an untracked file
+    writeFileSync(join(GIT_TEST_DIR, 'untracked.txt'), 'test');
+
+    const result = await gitStatusTool.execute({ short: true });
+    assert.ok(result.includes('??') || result.includes('untracked'));
+
+    // Cleanup
+    rmSync(join(GIT_TEST_DIR, 'untracked.txt'));
+  });
+
+  it('should show diff for modified files', async () => {
+    // Modify a file
+    writeFileSync(join(GIT_TEST_DIR, 'README.md'), '# Test Repo\n\nUpdated content\n');
+
+    const result = await gitDiffTool.execute({});
+    assert.ok(result.includes('Updated') || result.includes('README'));
+
+    // Reset
+    spawnSync('git', ['checkout', 'README.md'], { cwd: GIT_TEST_DIR });
+  });
+
+  it('should show diff with stat format', async () => {
+    // Modify a file
+    writeFileSync(join(GIT_TEST_DIR, 'README.md'), '# Test Repo\n\nMore content\n');
+
+    const result = await gitDiffTool.execute({ stat: true });
+    assert.ok(result.includes('README') || result.includes('insertion') || result.includes('+'));
+
+    // Reset
+    spawnSync('git', ['checkout', 'README.md'], { cwd: GIT_TEST_DIR });
+  });
+
+  it('should return no changes when nothing modified', async () => {
+    const result = await gitDiffTool.execute({});
+    assert.ok(result.includes('No changes') || result === '');
+  });
+
+  it('should commit with auto-generated message', async () => {
+    // Create a new file
+    writeFileSync(join(GIT_TEST_DIR, 'newfile.txt'), 'new content');
+
+    const result = await gitCommitTool.execute({ files: 'newfile.txt' });
+    assert.ok(result.includes('Committed') || result.includes('newfile'));
+  });
+
+  it('should commit with custom message', async () => {
+    // Create another file
+    writeFileSync(join(GIT_TEST_DIR, 'another.txt'), 'another file');
+
+    const result = await gitCommitTool.execute({
+      files: 'another.txt',
+      message: 'feat: add another file',
+    });
+    assert.ok(result.includes('feat: add another file'));
+  });
+
+  it('should handle nothing to commit', async () => {
+    const result = await gitCommitTool.execute({});
+    assert.ok(result.includes('Nothing to commit') || result.includes('no staged'));
+  });
+
+  it('should reject file paths starting with dash', async () => {
+    const result = await gitCommitTool.execute({ files: '-rf' });
+    assert.ok(result.includes('Error') || result.includes('Invalid'));
+  });
+
+  it('should show staged changes only', async () => {
+    // Create and stage a file
+    writeFileSync(join(GIT_TEST_DIR, 'staged.txt'), 'staged content');
+    spawnSync('git', ['add', 'staged.txt'], { cwd: GIT_TEST_DIR });
+
+    const result = await gitDiffTool.execute({ staged: true });
+    assert.ok(result.includes('staged') || result.includes('+'));
+
+    // Cleanup - unstage
+    spawnSync('git', ['reset', 'HEAD', 'staged.txt'], { cwd: GIT_TEST_DIR });
+    rmSync(join(GIT_TEST_DIR, 'staged.txt'));
+  });
+});

From 7a2bb659ee2657adf70cd8c04ecf3fc38d9231d9 Mon Sep 17 00:00:00 2001
From: Robert <robert.alexander37899@gmail.com>
Date: Thu, 22 Jan 2026 20:44:20 -0500
Subject: [PATCH 2/2] fix: resolve 6 high-severity bugs

- hooks/executor.ts: Track hook errors in metadata, stop execution for
  critical hooks (priority < 10) on failure instead of silently continuing

- rpc/client.ts: Log JSON parse errors with content preview instead of
  silently ignoring; add cleanupStaleRequests() to prevent memory leaks
  from unbounded pendingRequests map

- rpc/types.ts: Add createdAt timestamp to PendingRequest for cleanup

- skills/parser.ts: Capture full error details (stderr, exit code) for
  command failures; add security documentation for command execution

- background.ts: Add automatic cleanup of completed tasks (>1hr old),
  cap total tasks at 100 to prevent unbounded memory growth

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/background.ts     | 55 +++++++++++++++++++++++++++++++++++++++++++
 src/hooks/executor.ts | 24 ++++++++++++++++++-
 src/rpc/client.ts     | 32 ++++++++++++++++++++++---
 src/rpc/types.ts      |  1 +
 src/skills/parser.ts  | 22 +++++++++++++++--
 5 files changed, 128 insertions(+), 6 deletions(-)

diff --git a/src/background.ts b/src/background.ts
index e1861ba..9a76382 100644
--- a/src/background.ts
+++ b/src/background.ts
@@ -18,11 +18,15 @@ export interface BackgroundTask {
 class BackgroundTaskManager {
   private tasks = new Map<string, BackgroundTask>();
   private nextId = 1;
+  private static readonly MAX_TASKS = 100; // Maximum tasks to retain
+  private static readonly MAX_COMPLETED_AGE_MS = 3600000; // 1 hour
 
   /**
    * Start a new background task
    */
   start(command: string, timeout = 600000): string {
+    // Auto-cleanup old completed tasks before starting new one
+    this.autoCleanup();
     const id = `bg_${this.nextId++}`;
     const task: BackgroundTask = {
       id,
@@ -151,6 +155,57 @@ class BackgroundTaskManager {
     }
     return count;
   }
+
+  /**
+   * Automatic cleanup to prevent unbounded memory growth
+   * - Removes completed tasks older than MAX_COMPLETED_AGE_MS
+   * - If still over MAX_TASKS, removes oldest completed tasks
+   */
+  private autoCleanup(): void {
+    const now = Date.now();
+
+    // First pass: remove old completed tasks
+    for (const [id, task] of this.tasks) {
+      if (task.status !== 'running' && task.completedAt) {
+        const age = now - task.completedAt.getTime();
+        if (age > BackgroundTaskManager.MAX_COMPLETED_AGE_MS) {
+          this.tasks.delete(id);
+        }
+      }
+    }
+
+    // Second pass: if still over limit, remove oldest completed tasks
+    if (this.tasks.size >= BackgroundTaskManager.MAX_TASKS) {
+      const completedTasks = Array.from(this.tasks.entries())
+        .filter(([, task]) => task.status !== 'running')
+        .sort((a, b) => {
+          const timeA = a[1].completedAt?.getTime() ?? 0;
+          const timeB = b[1].completedAt?.getTime() ?? 0;
+          return timeA - timeB; // Oldest first
+        });
+
+      // Remove oldest completed tasks to get under limit
+      const toRemove = this.tasks.size - BackgroundTaskManager.MAX_TASKS + 10; // Leave some room
+      for (let i = 0; i < Math.min(toRemove, completedTasks.length); i++) {
+        this.tasks.delete(completedTasks[i][0]);
+      }
+    }
+  }
+
+  /**
+   * Get task manager statistics
+   */
+  getStats(): { total: number; running: number; completed: number; failed: number } {
+    let running = 0;
+    let completed = 0;
+    let failed = 0;
+    for (const task of this.tasks.values()) {
+      if (task.status === 'running') running++;
+      else if (task.status === 'completed') completed++;
+      else failed++;
+    }
+    return { total: this.tasks.size, running, completed, failed };
+  }
 }
 
 // Singleton instance
diff --git a/src/hooks/executor.ts b/src/hooks/executor.ts
index 3c4a996..57692c7 100644
--- a/src/hooks/executor.ts
+++ b/src/hooks/executor.ts
@@ -56,7 +56,29 @@ export async function executeHooks(
         aggregatedResult.metadata = { ...aggregatedResult.metadata, ...result.metadata };
       }
     } catch (error) {
-      console.error(`Hook ${hook.name} failed:`, error);
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      console.error(`Hook ${hook.name} failed:`, errorMessage);
+
+      // Track hook errors in metadata for visibility
+      aggregatedResult.metadata = {
+        ...aggregatedResult.metadata,
+        hookErrors: [
+          ...((aggregatedResult.metadata?.hookErrors as string[]) || []),
+          `${hook.name}: ${errorMessage}`,
+        ],
+      };
+
+      // For critical hooks (priority < 10), stop execution on failure
+      // Default priority is 100, so only explicitly low-priority hooks are critical
+      if ((hook.priority ?? 100) < 10) {
+        return {
+          continue: false,
+          metadata: {
+            ...aggregatedResult.metadata,
+            criticalHookFailed: hook.name,
+          },
+        };
+      }
     }
   }
 
diff --git a/src/rpc/client.ts b/src/rpc/client.ts
index 14e4203..088b53a 100644
--- a/src/rpc/client.ts
+++ b/src/rpc/client.ts
@@ -119,11 +119,32 @@ export class RpcClient {
           }
         }
       }
-    } catch {
-      // Ignore parse errors
+    } catch (error) {
+      // Log parse errors for debugging - malformed JSON from servers is a real issue
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      console.error(`[RPC] JSON parse error: ${errorMessage}`);
+      console.error(`[RPC] Content preview: ${content.slice(0, 100)}...`);
     }
   }
 
+  /**
+   * Cleanup stale pending requests (older than maxAge ms)
+   * Call this periodically to prevent memory leaks
+   */
+  cleanupStaleRequests(maxAge = 300000): number {
+    const now = Date.now();
+    let cleaned = 0;
+    for (const [id, pending] of this.pendingRequests.entries()) {
+      if (pending.createdAt && now - pending.createdAt > maxAge) {
+        if (pending.timeoutId) clearTimeout(pending.timeoutId);
+        pending.reject(new Error('Request expired during cleanup'));
+        this.pendingRequests.delete(id);
+        cleaned++;
+      }
+    }
+    return cleaned;
+  }
+
   private send(msg: object): void {
     if (!this.process?.stdin) return;
 
@@ -148,7 +169,12 @@ export class RpcClient {
         reject(new Error(`Request timeout: ${method}`));
       }, this.options.timeout);
 
-      this.pendingRequests.set(id, { resolve: resolve as (v: unknown) => void, reject, timeoutId });
+      this.pendingRequests.set(id, {
+        resolve: resolve as (v: unknown) => void,
+        reject,
+        timeoutId,
+        createdAt: Date.now(),
+      });
       this.send(request);
     });
   }
diff --git a/src/rpc/types.ts b/src/rpc/types.ts
index 9d2832d..f356828 100644
--- a/src/rpc/types.ts
+++ b/src/rpc/types.ts
@@ -43,4 +43,5 @@ export interface PendingRequest {
   resolve: (value: unknown) => void;
   reject: (error: Error) => void;
   timeoutId?: ReturnType<typeof setTimeout>;
+  createdAt: number; // Timestamp for cleanup of stale requests
 }
diff --git a/src/skills/parser.ts b/src/skills/parser.ts
index 594b2c4..404a601 100644
--- a/src/skills/parser.ts
+++ b/src/skills/parser.ts
@@ -176,6 +176,10 @@ function resolveFileVariable(path: string, errors: string[]): string {
 /**
  * Execute a command from a skill template.
  * Commands in skill files are user-defined and trusted.
+ *
+ * SECURITY NOTE: This intentionally executes shell commands from skill files.
+ * Skills are user-authored and considered trusted content.
+ * Only use skills from sources you trust.
  */
 function resolveCommandVariable(command: string, errors: string[]): string {
   if (!command) {
@@ -185,14 +189,28 @@ function resolveCommandVariable(command: string, errors: string[]): string {
 
   try {
     // User-defined command from skill file - intentionally executes shell commands
+    // This is safe because skills are user-authored trusted content
     const output = execSync(command, {
       encoding: 'utf-8',
       timeout: 10000,
       stdio: ['pipe', 'pipe', 'pipe'],
     });
     return output.trim();
-  } catch {
-    errors.push(`Command failed: ${command}`);
+  } catch (error) {
+    // Capture actual error details for debugging
+    let errorDetail = 'Unknown error';
+    if (error instanceof Error) {
+      errorDetail = error.message;
+      // For exec errors, stderr is often in the error object
+      const execError = error as Error & { stderr?: string; status?: number };
+      if (execError.stderr) {
+        errorDetail = execError.stderr.trim() || errorDetail;
+      }
+      if (execError.status !== undefined) {
+        errorDetail += ` (exit code: ${execError.status})`;
+      }
+    }
+    errors.push(`Command failed: ${command} - ${errorDetail}`);
     return `[Command failed: ${command}]`;
   }
 }