Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
405 changes: 0 additions & 405 deletions apps/vscode-e2e/README.md

This file was deleted.

116 changes: 16 additions & 100 deletions apps/vscode-e2e/src/suite/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,6 @@ import type { RooCodeAPI } from "@roo-code/types"

import { waitFor } from "./utils"

/**
* Models to test against - high-performing models from different providers
*/
const MODELS_TO_TEST = ["openai/gpt-5.2", "anthropic/claude-sonnet-4.5", "google/gemini-3-pro-preview"]

interface ModelTestResult {
model: string
failures: number
passes: number
duration: number
}

export async function run() {
const extension = vscode.extensions.getExtension<RooCodeAPI>("RooVeterinaryInc.roo-cline")

Expand All @@ -28,18 +16,28 @@ export async function run() {

const api = extension.isActive ? extension.exports : await extension.activate()

// Initial configuration with first model (will be reconfigured per model)
await api.setConfiguration({
apiProvider: "openrouter" as const,
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
openRouterModelId: MODELS_TO_TEST[0],
openRouterModelId: "openai/gpt-4.1",
})

await vscode.commands.executeCommand("roo-cline.SidebarProvider.focus")
await waitFor(() => api.isReady())

globalThis.api = api

const mochaOptions: Mocha.MochaOptions = {
ui: "tdd",
timeout: 20 * 60 * 1_000, // 20m
}

if (process.env.TEST_GREP) {
mochaOptions.grep = process.env.TEST_GREP
console.log(`Running tests matching pattern: ${process.env.TEST_GREP}`)
}

const mocha = new Mocha(mochaOptions)
const cwd = path.resolve(__dirname, "..")

let testFiles: string[]
Expand All @@ -59,91 +57,9 @@ export async function run() {
throw new Error(`No test files found matching criteria: ${process.env.TEST_FILE || "all tests"}`)
}

const results: ModelTestResult[] = []
let totalFailures = 0

// Run tests for each model sequentially
for (const model of MODELS_TO_TEST) {
console.log(`\n${"=".repeat(60)}`)
console.log(` TESTING WITH MODEL: ${model}`)
console.log(`${"=".repeat(60)}\n`)

// Reconfigure API for this model
await api.setConfiguration({
apiProvider: "openrouter" as const,
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
openRouterModelId: model,
})

// Wait for API to be ready with new configuration
await waitFor(() => api.isReady())

const startTime = Date.now()

const mochaOptions: Mocha.MochaOptions = {
ui: "tdd",
timeout: 20 * 60 * 1_000, // 20m
}

if (process.env.TEST_GREP) {
mochaOptions.grep = process.env.TEST_GREP
console.log(`Running tests matching pattern: ${process.env.TEST_GREP}`)
}

const mocha = new Mocha(mochaOptions)

// Add test files fresh for each model run
testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))

// Run tests for this model
const modelResult = await new Promise<{ failures: number; passes: number }>((resolve) => {
const runner = mocha.run((failures) => {
resolve({
failures,
passes: runner.stats?.passes ?? 0,
})
})
})

const duration = Date.now() - startTime

results.push({
model,
failures: modelResult.failures,
passes: modelResult.passes,
duration,
})

totalFailures += modelResult.failures

console.log(
`\n[${model}] Completed: ${modelResult.passes} passed, ${modelResult.failures} failed (${(duration / 1000).toFixed(1)}s)\n`,
)

// Clear mocha's require cache to allow re-running tests
mocha.dispose()
testFiles.forEach((testFile) => {
const fullPath = path.resolve(cwd, testFile)
delete require.cache[require.resolve(fullPath)]
})
}

// Print summary
console.log(`\n${"=".repeat(60)}`)
console.log(` MULTI-MODEL TEST SUMMARY`)
console.log(`${"=".repeat(60)}`)

for (const result of results) {
const status = result.failures === 0 ? "✓ PASS" : "✗ FAIL"
console.log(` ${status} ${result.model}`)
console.log(
` ${result.passes} passed, ${result.failures} failed (${(result.duration / 1000).toFixed(1)}s)`,
)
}
testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))

console.log(`${"=".repeat(60)}\n`)

if (totalFailures > 0) {
throw new Error(`${totalFailures} total test failures across all models.`)
}
return new Promise<void>((resolve, reject) =>
mocha.run((failures) => (failures === 0 ? resolve() : reject(new Error(`${failures} tests failed.`)))),
)
}
113 changes: 47 additions & 66 deletions apps/vscode-e2e/src/suite/subtasks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,92 +2,73 @@ import * as assert from "assert"

import { RooCodeEventName, type ClineMessage } from "@roo-code/types"

import { waitFor } from "./utils"
import { sleep, waitFor, waitUntilCompleted } from "./utils"

suite("Roo Code Subtasks", () => {
test("Should create and complete a subtask successfully", async function () {
this.timeout(180_000) // 3 minutes for complex orchestration
suite.skip("Roo Code Subtasks", () => {
test("Should handle subtask cancellation and resumption correctly", async () => {
const api = globalThis.api

const messages: ClineMessage[] = []
let childTaskCompleted = false
let parentCompleted = false
const messages: Record<string, ClineMessage[]> = {}

// Listen for messages to detect subtask result
const messageHandler = ({ message }: { message: ClineMessage }) => {
messages.push(message)

// Log completion messages
if (message.type === "say" && message.say === "completion_result") {
console.log("Completion result:", message.text?.substring(0, 100))
}
}
api.on(RooCodeEventName.Message, messageHandler)

// Listen for task completion
const completionHandler = (taskId: string) => {
if (taskId === parentTaskId) {
parentCompleted = true
console.log("✓ Parent task completed")
} else {
childTaskCompleted = true
console.log("✓ Child task completed:", taskId)
api.on(RooCodeEventName.Message, ({ taskId, message }) => {
if (message.type === "say" && message.partial === false) {
messages[taskId] = messages[taskId] || []
messages[taskId].push(message)
}
}
api.on(RooCodeEventName.TaskCompleted, completionHandler)
})

const childPrompt = "What is 2 + 2? Respond with just the number."
const childPrompt = "You are a calculator. Respond only with numbers. What is the square root of 9?"

// Start a parent task that will create a subtask
console.log("Starting parent task that will spawn subtask...")
// Start a parent task that will create a subtask.
const parentTaskId = await api.startNewTask({
configuration: {
mode: "code",
mode: "ask",
alwaysAllowModeSwitch: true,
alwaysAllowSubtasks: true,
autoApprovalEnabled: true,
enableCheckpoints: false,
},
text: `Create a subtask using the new_task tool with this message: "${childPrompt}". Wait for the subtask to complete, then tell me the result.`,
text:
"You are the parent task. " +
`Create a subtask by using the new_task tool with the message '${childPrompt}'.` +
"After creating the subtask, wait for it to complete and then respond 'Parent task resumed'.",
})

try {
// Wait for child task to complete
console.log("Waiting for child task to complete...")
await waitFor(() => childTaskCompleted, { timeout: 90_000 })
console.log("✓ Child task completed")
let spawnedTaskId: string | undefined = undefined

// Wait for parent to complete
console.log("Waiting for parent task to complete...")
await waitFor(() => parentCompleted, { timeout: 90_000 })
console.log("✓ Parent task completed")
// Wait for the subtask to be spawned and then cancel it.
api.on(RooCodeEventName.TaskSpawned, (_, childTaskId) => (spawnedTaskId = childTaskId))
await waitFor(() => !!spawnedTaskId)
await sleep(1_000) // Give the task a chance to start and populate the history.
await api.cancelCurrentTask()

// Verify the parent task mentions the subtask result (should contain "4")
const hasSubtaskResult = messages.some(
(m) =>
m.type === "say" &&
m.say === "completion_result" &&
m.text?.includes("4") &&
m.text?.toLowerCase().includes("subtask"),
)
// Wait a bit to ensure any task resumption would have happened.
await sleep(2_000)

// Verify all events occurred
assert.ok(childTaskCompleted, "Child task should have completed")
assert.ok(parentCompleted, "Parent task should have completed")
assert.ok(hasSubtaskResult, "Parent task should mention the subtask result")
// The parent task should not have resumed yet, so we shouldn't see
// "Parent task resumed".
assert.ok(
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
undefined,
"Parent task should not have resumed after subtask cancellation",
)

console.log("Test passed! Subtask orchestration working correctly")
} finally {
// Clean up
api.off(RooCodeEventName.Message, messageHandler)
api.off(RooCodeEventName.TaskCompleted, completionHandler)
// Start a new task with the same message as the subtask.
const anotherTaskId = await api.startNewTask({ text: childPrompt })
await waitUntilCompleted({ api, taskId: anotherTaskId })

// Cancel any remaining tasks
try {
await api.cancelCurrentTask()
} catch {
// Task might already be complete
}
}
// Wait a bit to ensure any task resumption would have happened.
await sleep(2_000)

// The parent task should still not have resumed.
assert.ok(
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
undefined,
"Parent task should not have resumed after subtask cancellation",
)

// Clean up - cancel all tasks.
await api.clearCurrentTask()
await waitUntilCompleted({ api, taskId: parentTaskId })
})
})
Loading
Loading