RooCodeInc · hannesrudolph · Jan 15, 2026
@@ -7,18 +7,6 @@ import type { RooCodeAPI } from "@roo-code/types"
 
 import { waitFor } from "./utils"
 
-/**
- * Models to test against - high-performing models from different providers
- */
-const MODELS_TO_TEST = ["openai/gpt-5.2", "anthropic/claude-sonnet-4.5", "google/gemini-3-pro-preview"]
-
-interface ModelTestResult {
-	model: string
-	failures: number
-	passes: number
-	duration: number
-}
-
 export async function run() {
 	const extension = vscode.extensions.getExtension<RooCodeAPI>("RooVeterinaryInc.roo-cline")
 
@@ -28,18 +16,28 @@ export async function run() {
 
 	const api = extension.isActive ? extension.exports : await extension.activate()
 
-	// Initial configuration with first model (will be reconfigured per model)
 	await api.setConfiguration({
 		apiProvider: "openrouter" as const,
 		openRouterApiKey: process.env.OPENROUTER_API_KEY!,
-		openRouterModelId: MODELS_TO_TEST[0],
+		openRouterModelId: "openai/gpt-4.1",
 	})
 
 	await vscode.commands.executeCommand("roo-cline.SidebarProvider.focus")
 	await waitFor(() => api.isReady())
 
 	globalThis.api = api
 
+	const mochaOptions: Mocha.MochaOptions = {
+		ui: "tdd",
+		timeout: 20 * 60 * 1_000, // 20m
+	}
+
+	if (process.env.TEST_GREP) {
+		mochaOptions.grep = process.env.TEST_GREP
+		console.log(`Running tests matching pattern: ${process.env.TEST_GREP}`)
+	}
+
+	const mocha = new Mocha(mochaOptions)
 	const cwd = path.resolve(__dirname, "..")
 
 	let testFiles: string[]
@@ -59,91 +57,9 @@ export async function run() {
 		throw new Error(`No test files found matching criteria: ${process.env.TEST_FILE || "all tests"}`)
 	}
 
-	const results: ModelTestResult[] = []
-	let totalFailures = 0
-
-	// Run tests for each model sequentially
-	for (const model of MODELS_TO_TEST) {
-		console.log(`\n${"=".repeat(60)}`)
-		console.log(`  TESTING WITH MODEL: ${model}`)
-		console.log(`${"=".repeat(60)}\n`)
-
-		// Reconfigure API for this model
-		await api.setConfiguration({
-			apiProvider: "openrouter" as const,
-			openRouterApiKey: process.env.OPENROUTER_API_KEY!,
-			openRouterModelId: model,
-		})
-
-		// Wait for API to be ready with new configuration
-		await waitFor(() => api.isReady())
-
-		const startTime = Date.now()
-
-		const mochaOptions: Mocha.MochaOptions = {
-			ui: "tdd",
-			timeout: 20 * 60 * 1_000, // 20m
-		}
-
-		if (process.env.TEST_GREP) {
-			mochaOptions.grep = process.env.TEST_GREP
-			console.log(`Running tests matching pattern: ${process.env.TEST_GREP}`)
-		}
-
-		const mocha = new Mocha(mochaOptions)
-
-		// Add test files fresh for each model run
-		testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))
-
-		// Run tests for this model
-		const modelResult = await new Promise<{ failures: number; passes: number }>((resolve) => {
-			const runner = mocha.run((failures) => {
-				resolve({
-					failures,
-					passes: runner.stats?.passes ?? 0,
-				})
-			})
-		})
-
-		const duration = Date.now() - startTime
-
-		results.push({
-			model,
-			failures: modelResult.failures,
-			passes: modelResult.passes,
-			duration,
-		})
-
-		totalFailures += modelResult.failures
-
-		console.log(
-			`\n[${model}] Completed: ${modelResult.passes} passed, ${modelResult.failures} failed (${(duration / 1000).toFixed(1)}s)\n`,
-		)
-
-		// Clear mocha's require cache to allow re-running tests
-		mocha.dispose()
-		testFiles.forEach((testFile) => {
-			const fullPath = path.resolve(cwd, testFile)
-			delete require.cache[require.resolve(fullPath)]
-		})
-	}
-
-	// Print summary
-	console.log(`\n${"=".repeat(60)}`)
-	console.log(`  MULTI-MODEL TEST SUMMARY`)
-	console.log(`${"=".repeat(60)}`)
-
-	for (const result of results) {
-		const status = result.failures === 0 ? "✓ PASS" : "✗ FAIL"
-		console.log(`  ${status} ${result.model}`)
-		console.log(
-			`       ${result.passes} passed, ${result.failures} failed (${(result.duration / 1000).toFixed(1)}s)`,
-		)
-	}
+	testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))
 
-	console.log(`${"=".repeat(60)}\n`)
-
-	if (totalFailures > 0) {
-		throw new Error(`${totalFailures} total test failures across all models.`)
-	}
+	return new Promise<void>((resolve, reject) =>
+		mocha.run((failures) => (failures === 0 ? resolve() : reject(new Error(`${failures} tests failed.`)))),
+	)
 }
@@ -2,92 +2,73 @@ import * as assert from "assert"
 
 import { RooCodeEventName, type ClineMessage } from "@roo-code/types"
 
-import { waitFor } from "./utils"
+import { sleep, waitFor, waitUntilCompleted } from "./utils"
 
-suite("Roo Code Subtasks", () => {
-	test("Should create and complete a subtask successfully", async function () {
-		this.timeout(180_000) // 3 minutes for complex orchestration
+suite.skip("Roo Code Subtasks", () => {
+	test("Should handle subtask cancellation and resumption correctly", async () => {
 		const api = globalThis.api
 
-		const messages: ClineMessage[] = []
-		let childTaskCompleted = false
-		let parentCompleted = false
+		const messages: Record<string, ClineMessage[]> = {}
 
-		// Listen for messages to detect subtask result
-		const messageHandler = ({ message }: { message: ClineMessage }) => {
-			messages.push(message)
-
-			// Log completion messages
-			if (message.type === "say" && message.say === "completion_result") {
-				console.log("Completion result:", message.text?.substring(0, 100))
-			}
-		}
-		api.on(RooCodeEventName.Message, messageHandler)
-
-		// Listen for task completion
-		const completionHandler = (taskId: string) => {
-			if (taskId === parentTaskId) {
-				parentCompleted = true
-				console.log("✓ Parent task completed")
-			} else {
-				childTaskCompleted = true
-				console.log("✓ Child task completed:", taskId)
+		api.on(RooCodeEventName.Message, ({ taskId, message }) => {
+			if (message.type === "say" && message.partial === false) {
+				messages[taskId] = messages[taskId] || []
+				messages[taskId].push(message)
 			}
-		}
-		api.on(RooCodeEventName.TaskCompleted, completionHandler)
+		})
 
-		const childPrompt = "What is 2 + 2? Respond with just the number."
+		const childPrompt = "You are a calculator. Respond only with numbers. What is the square root of 9?"
 
-		// Start a parent task that will create a subtask
-		console.log("Starting parent task that will spawn subtask...")
+		// Start a parent task that will create a subtask.
 		const parentTaskId = await api.startNewTask({
 			configuration: {
-				mode: "code",
+				mode: "ask",
 				alwaysAllowModeSwitch: true,
 				alwaysAllowSubtasks: true,
 				autoApprovalEnabled: true,
 				enableCheckpoints: false,
 			},
-			text: `Create a subtask using the new_task tool with this message: "${childPrompt}". Wait for the subtask to complete, then tell me the result.`,
+			text:
+				"You are the parent task. " +
+				`Create a subtask by using the new_task tool with the message '${childPrompt}'.` +
+				"After creating the subtask, wait for it to complete and then respond 'Parent task resumed'.",
 		})
 
-		try {
-			// Wait for child task to complete
-			console.log("Waiting for child task to complete...")
-			await waitFor(() => childTaskCompleted, { timeout: 90_000 })
-			console.log("✓ Child task completed")
+		let spawnedTaskId: string | undefined = undefined
 
-			// Wait for parent to complete
-			console.log("Waiting for parent task to complete...")
-			await waitFor(() => parentCompleted, { timeout: 90_000 })
-			console.log("✓ Parent task completed")
+		// Wait for the subtask to be spawned and then cancel it.
+		api.on(RooCodeEventName.TaskSpawned, (_, childTaskId) => (spawnedTaskId = childTaskId))
+		await waitFor(() => !!spawnedTaskId)
+		await sleep(1_000) // Give the task a chance to start and populate the history.
+		await api.cancelCurrentTask()
 
-			// Verify the parent task mentions the subtask result (should contain "4")
-			const hasSubtaskResult = messages.some(
-				(m) =>
-					m.type === "say" &&
-					m.say === "completion_result" &&
-					m.text?.includes("4") &&
-					m.text?.toLowerCase().includes("subtask"),
-			)
+		// Wait a bit to ensure any task resumption would have happened.
+		await sleep(2_000)
 
-			// Verify all events occurred
-			assert.ok(childTaskCompleted, "Child task should have completed")
-			assert.ok(parentCompleted, "Parent task should have completed")
-			assert.ok(hasSubtaskResult, "Parent task should mention the subtask result")
+		// The parent task should not have resumed yet, so we shouldn't see
+		// "Parent task resumed".
+		assert.ok(
+			messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
+				undefined,
+			"Parent task should not have resumed after subtask cancellation",
+		)
 
-			console.log("Test passed! Subtask orchestration working correctly")
-		} finally {
-			// Clean up
-			api.off(RooCodeEventName.Message, messageHandler)
-			api.off(RooCodeEventName.TaskCompleted, completionHandler)
+		// Start a new task with the same message as the subtask.
+		const anotherTaskId = await api.startNewTask({ text: childPrompt })
+		await waitUntilCompleted({ api, taskId: anotherTaskId })
 
-			// Cancel any remaining tasks
-			try {
-				await api.cancelCurrentTask()
-			} catch {
-				// Task might already be complete
-			}
-		}
+		// Wait a bit to ensure any task resumption would have happened.
+		await sleep(2_000)
+
+		// The parent task should still not have resumed.
+		assert.ok(
+			messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
+				undefined,
+			"Parent task should not have resumed after subtask cancellation",
+		)
+
+		// Clean up - cancel all tasks.
+		await api.clearCurrentTask()
+		await waitUntilCompleted({ api, taskId: parentTaskId })
 	})
 })