From 4ed56aff66751ed8a84846cf55a1d042c5cb5f48 Mon Sep 17 00:00:00 2001 From: bkellam Date: Wed, 28 Jan 2026 15:08:27 -0800 Subject: [PATCH 01/13] wip --- packages/mcp/src/client.ts | 26 +- packages/mcp/src/index.ts | 52 +++- packages/mcp/src/schemas.ts | 32 +++ packages/mcp/src/types.ts | 5 + .../app/api/(server)/chat/blocking/route.ts | 257 ++++++++++++++++++ .../web/src/app/api/(server)/chat/route.ts | 10 +- packages/web/src/features/chat/agent.ts | 163 ++++++++++- 7 files changed, 533 insertions(+), 12 deletions(-) create mode 100644 packages/web/src/app/api/(server)/chat/blocking/route.ts diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index a98ab136..70961ec9 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1,6 +1,6 @@ import { env } from './env.js'; -import { listReposResponseSchema, searchResponseSchema, fileSourceResponseSchema, listCommitsResponseSchema } from './schemas.js'; -import { FileSourceRequest, ListReposQueryParams, SearchRequest, ListCommitsQueryParamsSchema } from './types.js'; +import { listReposResponseSchema, searchResponseSchema, fileSourceResponseSchema, listCommitsResponseSchema, askCodebaseResponseSchema } from './schemas.js'; +import { AskCodebaseRequest, AskCodebaseResponse, FileSourceRequest, ListReposQueryParams, SearchRequest, ListCommitsQueryParamsSchema } from './types.js'; import { isServiceError, ServiceErrorException } from './utils.js'; import { z } from 'zod'; @@ -106,4 +106,24 @@ export const listCommits = async (queryParams: ListCommitsQueryParamsSchema) => const commits = await parseResponse(response, listCommitsResponseSchema); const totalCount = parseInt(response.headers.get('X-Total-Count') ?? '0', 10); return { commits, totalCount }; -} \ No newline at end of file +} + +/** + * Asks a natural language question about the codebase using the Sourcebot AI agent. + * This is a blocking call that runs the full agent loop and returns when complete. + * + * @param request - The question and optional repo filters + * @returns The agent's answer, chat URL, sources, and metadata + */ +export const askCodebase = async (request: AskCodebaseRequest): Promise => { + const response = await fetch(`${env.SOURCEBOT_HOST}/api/chat/blocking`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + ...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {}) + }, + body: JSON.stringify(request), + }); + + return parseResponse(response, askCodebaseResponseSchema); +} diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 2ab02fd6..d44b58e8 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -6,10 +6,10 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' import _dedent from "dedent"; import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; -import { getFileSource, listCommits, listRepos, search } from './client.js'; +import { askCodebase, getFileSource, listCommits, listRepos, search } from './client.js'; import { env, numberSchema } from './env.js'; -import { fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js'; -import { FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js'; +import { askCodebaseRequestSchema, fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js'; +import { AskCodebaseRequest, FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js'; const dedent = _dedent.withOptions({ alignValues: true }); @@ -238,7 +238,53 @@ server.tool( } ); +server.tool( + "ask_codebase", + dedent` + Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. + + The agent will: + - Analyze your question and determine what context it needs + - Search the codebase using multiple strategies (code search, symbol lookup, file reading) + - Synthesize findings into a comprehensive answer with code references + + Returns a detailed answer in markdown format with code references, plus a link to view the full research session (including all tool calls and reasoning) in the Sourcebot web UI. + + This is a blocking operation that may take 30-60+ seconds for complex questions as the agent researches the codebase. + `, + { + question: z.string().describe("The question to ask about the codebase."), + repo: z.string().describe("The repository to ask the question on."), + }, + async ({ + question, + repo, + }) => { + const response = await askCodebase({ + question, + repos: [repo], + }); + // Format the response with the answer and a link to the chat + const formattedResponse = dedent` + ${response.answer} + + --- + **View full research session:** ${response.chatUrl} + + **Sources referenced:** ${response.sources.length} files + **Response time:** ${(response.metadata.totalResponseTimeMs / 1000).toFixed(1)}s + **Model:** ${response.metadata.modelName} + `; + + return { + content: [{ + type: "text", + text: formattedResponse, + }], + }; + } +); const runServer = async () => { const transport = new StdioServerTransport(); diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index a70e18fa..39ac069b 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -272,3 +272,35 @@ export const listCommitsResponseSchema = z.array(z.object({ author_name: z.string(), author_email: z.string(), })); + +// ============================================================================ +// Ask Codebase (Blocking Chat API) +// ============================================================================ + +export const askCodebaseRequestSchema = z.object({ + question: z.string().describe("The question to ask about the codebase"), + repos: z.array(z.string()).optional().describe("Optional: filter to specific repositories by name"), +}); + +export const sourceSchema = z.object({ + type: z.literal('file'), + repo: z.string(), + path: z.string(), + name: z.string(), + language: z.string(), + revision: z.string(), +}); + +export const askCodebaseResponseSchema = z.object({ + answer: z.string().describe("The agent's final answer in markdown format"), + chatId: z.string().describe("ID of the persisted chat session"), + chatUrl: z.string().describe("URL to view the chat in the web UI"), + sources: z.array(sourceSchema).describe("Files the agent referenced during research"), + metadata: z.object({ + totalTokens: z.number(), + inputTokens: z.number(), + outputTokens: z.number(), + totalResponseTimeMs: z.number(), + modelName: z.string(), + }).describe("Metadata about the response"), +}); diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index cd64cb08..8a721970 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -12,6 +12,8 @@ import { serviceErrorSchema, listCommitsQueryParamsSchema, listCommitsResponseSchema, + askCodebaseRequestSchema, + askCodebaseResponseSchema, } from "./schemas.js"; import { z } from "zod"; @@ -34,3 +36,6 @@ export type ServiceError = z.infer; export type ListCommitsQueryParamsSchema = z.infer; export type ListCommitsResponse = z.infer; + +export type AskCodebaseRequest = z.infer; +export type AskCodebaseResponse = z.infer; diff --git a/packages/web/src/app/api/(server)/chat/blocking/route.ts b/packages/web/src/app/api/(server)/chat/blocking/route.ts new file mode 100644 index 00000000..fd6a0a61 --- /dev/null +++ b/packages/web/src/app/api/(server)/chat/blocking/route.ts @@ -0,0 +1,257 @@ +import { sew } from "@/actions"; +import { _getConfiguredLanguageModelsFull, _getAISDKLanguageModelAndOptions, updateChatMessages } from "@/features/chat/actions"; +import { runAgentBlocking } from "@/features/chat/agent"; +import { ANSWER_TAG } from "@/features/chat/constants"; +import { LanguageModelInfo, SBChatMessage, Source } from "@/features/chat/types"; +import { convertLLMOutputToPortableMarkdown, getLanguageModelKey } from "@/features/chat/utils"; +import { ErrorCode } from "@/lib/errorCodes"; +import { requestBodySchemaValidationError, ServiceError, serviceErrorResponse } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { getBaseUrl } from "@/lib/utils.server"; +import { withOptionalAuthV2 } from "@/withAuthV2"; +import { ChatVisibility, Prisma } from "@sourcebot/db"; +import { createLogger } from "@sourcebot/shared"; +import { randomUUID } from "crypto"; +import { StatusCodes } from "http-status-codes"; +import { headers } from "next/headers"; +import { NextResponse } from "next/server"; +import { z } from "zod"; + +const logger = createLogger('chat-blocking-api'); + +/** + * Request schema for the blocking chat API. + * This is a simpler interface designed for MCP and other programmatic integrations. + */ +const blockingChatRequestSchema = z.object({ + // The question to ask about the codebase + question: z.string().min(1, "Question is required"), + // Optional: filter to specific repositories (by name) + repos: z.array(z.string()).optional(), + // Optional: specify a language model (defaults to first configured model) + languageModel: z.object({ + provider: z.string(), + model: z.string(), + displayName: z.string().optional(), + }).optional(), +}); + +/** + * Response schema for the blocking chat API. + */ +interface BlockingChatResponse { + // The agent's final answer (markdown format) + answer: string; + // ID of the persisted chat session + chatId: string; + // URL to view the chat in the web UI + chatUrl: string; + // Files the agent referenced during research + sources: Source[]; + // Metadata about the response + metadata: { + totalTokens: number; + inputTokens: number; + outputTokens: number; + totalResponseTimeMs: number; + modelName: string; + }; +} + +/** + * POST /api/chat/blocking + * + * A blocking (non-streaming) chat endpoint designed for MCP and other integrations. + * Creates a chat session, runs the agent to completion, and returns the final answer. + * + * The chat session is persisted to the database, allowing users to view the full + * conversation (including tool calls and reasoning) in the web UI. + */ +export async function POST(request: Request) { + const requestBody = await request.json(); + const parsed = await blockingChatRequestSchema.safeParseAsync(requestBody); + + if (!parsed.success) { + return serviceErrorResponse(requestBodySchemaValidationError(parsed.error)); + } + + const { question, repos, languageModel: requestedLanguageModel } = parsed.data; + + const response: BlockingChatResponse | ServiceError = await sew(() => + withOptionalAuthV2(async ({ org, user, prisma }) => { + // Get all configured language models + const configuredModels = await _getConfiguredLanguageModelsFull(); + if (configuredModels.length === 0) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: "No language models are configured. Please configure at least one language model.", + } satisfies ServiceError; + } + + // Select the language model to use + let languageModelConfig = configuredModels[0]; // Default to first configured model + + if (requestedLanguageModel) { + const requested = requestedLanguageModel as LanguageModelInfo; + const found = configuredModels.find( + (model) => getLanguageModelKey(model) === getLanguageModelKey(requested) + ); + if (!found) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Language model ${requested.model} is not configured.`, + } satisfies ServiceError; + } + languageModelConfig = found; + } + + + const { model, providerOptions } = await _getAISDKLanguageModelAndOptions(languageModelConfig); + const modelName = languageModelConfig.displayName ?? languageModelConfig.model; + + // Determine which repos to search + let searchScopeRepoNames: string[]; + + if (repos && repos.length > 0) { + // Use the provided repos filter + // Validate that these repos exist and the user has access + const validRepos = await prisma.repo.findMany({ + where: { + orgId: org.id, + name: { + in: repos, + }, + }, + select: { name: true }, + }); + + searchScopeRepoNames = validRepos.map(r => r.name); + + if (searchScopeRepoNames.length === 0) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: "None of the specified repositories were found or accessible.", + } satisfies ServiceError; + } + } else { + // Search all repos the user has access to + const allRepos = await prisma.repo.findMany({ + where: { + orgId: org.id, + }, + select: { name: true }, + }); + searchScopeRepoNames = allRepos.map(r => r.name); + } + + // Create a new chat session + const chat = await prisma.chat.create({ + data: { + orgId: org.id, + createdById: user?.id, + visibility: ChatVisibility.PRIVATE, + messages: [] as unknown as Prisma.InputJsonValue, + }, + }); + + const traceId = randomUUID(); + + // Run the agent to completion + logger.info(`Starting blocking agent for chat ${chat.id}`, { + chatId: chat.id, + question: question.substring(0, 100), + repoCount: searchScopeRepoNames.length, + model: modelName, + }); + + const agentResult = await runAgentBlocking({ + model, + providerOptions, + searchScopeRepoNames, + inputMessages: [{ role: 'user', content: question }], + inputSources: [], + traceId, + }); + + // Extract the answer (removing the answer tag if present) + let answer = agentResult.text; + if (answer.startsWith(ANSWER_TAG)) { + answer = answer.slice(ANSWER_TAG.length).trim(); + } + + // Convert to portable markdown (replaces @file: references with markdown links) + const portableAnswer = convertLLMOutputToPortableMarkdown(answer); + + // Build the chat URL + const headersList = await headers(); + const baseUrl = getBaseUrl(headersList); + const chatUrl = `${baseUrl}/${org.domain}/chat/${chat.id}`; + + // Create the message history for persistence + const userMessage: SBChatMessage = { + id: randomUUID(), + role: 'user', + parts: [{ type: 'text', text: question }], + }; + + const assistantMessage: SBChatMessage = { + id: randomUUID(), + role: 'assistant', + parts: [ + { type: 'text', text: agentResult.text }, + // Include sources as data parts + ...agentResult.sources.map((source) => ({ + type: 'data-source' as const, + data: source, + })), + ], + metadata: { + totalTokens: agentResult.usage.totalTokens, + totalInputTokens: agentResult.usage.inputTokens, + totalOutputTokens: agentResult.usage.outputTokens, + totalResponseTimeMs: agentResult.responseTimeMs, + modelName, + traceId, + }, + }; + + // Persist the messages to the chat + await updateChatMessages({ + chatId: chat.id, + messages: [userMessage, assistantMessage], + }); + + logger.info(`Completed blocking agent for chat ${chat.id}`, { + chatId: chat.id, + responseTimeMs: agentResult.responseTimeMs, + totalTokens: agentResult.usage.totalTokens, + sourceCount: agentResult.sources.length, + }); + + return { + answer: portableAnswer, + chatId: chat.id, + chatUrl, + sources: agentResult.sources, + metadata: { + totalTokens: agentResult.usage.totalTokens, + inputTokens: agentResult.usage.inputTokens, + outputTokens: agentResult.usage.outputTokens, + totalResponseTimeMs: agentResult.responseTimeMs, + modelName, + }, + } satisfies BlockingChatResponse; + }) + ); + + if (isServiceError(response)) { + return serviceErrorResponse(response); + } + + console.log(response); + + return NextResponse.json(response); +} diff --git a/packages/web/src/app/api/(server)/chat/route.ts b/packages/web/src/app/api/(server)/chat/route.ts index db16ea38..1594084d 100644 --- a/packages/web/src/app/api/(server)/chat/route.ts +++ b/packages/web/src/app/api/(server)/chat/route.ts @@ -5,7 +5,7 @@ import { additionalChatRequestParamsSchema, LanguageModelInfo, SBChatMessage, Se import { getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; import { apiHandler } from "@/lib/apiHandler"; import { ErrorCode } from "@/lib/errorCodes"; -import { notFound, requestBodySchemaValidationError, serviceErrorResponse } from "@/lib/serviceError"; +import { notFound, requestBodySchemaValidationError, ServiceError, serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; import { withOptionalAuthV2 } from "@/withAuthV2"; import { LanguageModelV2 as AISDKLanguageModelV2 } from "@ai-sdk/provider"; @@ -63,11 +63,11 @@ export const POST = apiHandler(async (req: NextRequest) => { } if (chat.isReadonly) { - return serviceErrorResponse({ + return { statusCode: StatusCodes.BAD_REQUEST, errorCode: ErrorCode.INVALID_REQUEST_BODY, message: "Chat is readonly and cannot be edited.", - }); + } satisfies ServiceError; } // From the language model ID, attempt to find the @@ -77,11 +77,11 @@ export const POST = apiHandler(async (req: NextRequest) => { .find((model) => getLanguageModelKey(model) === getLanguageModelKey(languageModel)); if (!languageModelConfig) { - return serviceErrorResponse({ + return { statusCode: StatusCodes.BAD_REQUEST, errorCode: ErrorCode.INVALID_REQUEST_BODY, message: `Language model ${languageModel.model} is not configured.`, - }); + } satisfies ServiceError; } const { model, providerOptions } = await _getAISDKLanguageModelAndOptions(languageModelConfig); diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index bb793b9e..75314aa2 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -4,7 +4,7 @@ import { getFileSource } from '@/features/git'; import { isServiceError } from "@/lib/utils"; import { ProviderOptions } from "@ai-sdk/provider-utils"; import { createLogger } from "@sourcebot/shared"; -import { LanguageModel, ModelMessage, StopCondition, streamText } from "ai"; +import { generateText, LanguageModel, ModelMessage, StopCondition, streamText } from "ai"; import { ANSWER_TAG, FILE_REFERENCE_PREFIX, toolNames } from "./constants"; import { createCodeSearchTool, findSymbolDefinitionsTool, findSymbolReferencesTool, readFilesTool, searchReposTool, listAllReposTool } from "./tools"; import { FileSource, Source } from "./types"; @@ -266,4 +266,165 @@ const resolveFileSource = async ({ path, repo, revision }: FileSource) => { language: fileSource.language, revision, } +} + +// ============================================================================ +// Blocking Agent Execution (for MCP and other non-streaming use cases) +// ============================================================================ + +interface BlockingAgentOptions { + model: LanguageModel; + providerOptions?: ProviderOptions; + searchScopeRepoNames: string[]; + inputMessages: ModelMessage[]; + inputSources: Source[]; + traceId: string; +} + +export interface BlockingAgentResult { + text: string; + sources: Source[]; + usage: { + inputTokens: number; + outputTokens: number; + totalTokens: number; + }; + responseTimeMs: number; +} + +/** + * Runs the chat agent in blocking mode, waiting for the complete response. + * This is used by the MCP server and other integrations that don't support streaming. + */ +export const runAgentBlocking = async ({ + model, + providerOptions, + inputMessages, + inputSources, + searchScopeRepoNames, + traceId, +}: BlockingAgentOptions): Promise => { + const startTime = Date.now(); + const collectedSources: Source[] = []; + + const onWriteSource = (source: Source) => { + // Deduplicate sources by checking if we already have this file + const exists = collectedSources.some( + (s) => s.type === source.type && + s.type === 'file' && source.type === 'file' && + s.repo === source.repo && + s.path === source.path + ); + if (!exists) { + collectedSources.push(source); + } + }; + + const baseSystemPrompt = createBaseSystemPrompt({ searchScopeRepoNames }); + + // Resolve any input file sources for the first step + let systemPromptWithSources = baseSystemPrompt; + if (inputSources.length > 0) { + const fileSources = inputSources.filter((source) => source.type === 'file'); + const resolvedFileSources = ( + await Promise.all(fileSources.map(resolveFileSource)) + ).filter((source) => source !== undefined); + + if (resolvedFileSources.length > 0) { + const fileSourcesSystemPrompt = await createFileSourcesSystemPrompt({ + files: resolvedFileSources + }); + systemPromptWithSources = `${baseSystemPrompt}\n\n${fileSourcesSystemPrompt}`; + } + } + + const result = await generateText({ + model, + providerOptions, + system: systemPromptWithSources, + messages: inputMessages, + tools: { + [toolNames.searchCode]: createCodeSearchTool(searchScopeRepoNames), + [toolNames.readFiles]: readFilesTool, + [toolNames.findSymbolReferences]: findSymbolReferencesTool, + [toolNames.findSymbolDefinitions]: findSymbolDefinitionsTool, + [toolNames.searchRepos]: searchReposTool, + [toolNames.listAllRepos]: listAllReposTool, + }, + temperature: env.SOURCEBOT_CHAT_MODEL_TEMPERATURE, + stopWhen: [ + stepCountIsGTE(env.SOURCEBOT_CHAT_MAX_STEP_COUNT), + ], + toolChoice: "auto", + onStepFinish: ({ toolResults }) => { + // Extract sources from tool results (same logic as streaming version) + toolResults.forEach(({ toolName, output, dynamic }) => { + // We don't care about dynamic tool results here. + if (dynamic) { + return; + } + + if (isServiceError(output)) { + return; + } + + if (toolName === toolNames.readFiles) { + (output as { path: string; repository: string; language: string; revision: string }[]).forEach((file) => { + onWriteSource({ + type: 'file', + language: file.language, + repo: file.repository, + path: file.path, + revision: file.revision, + name: file.path.split('/').pop() ?? file.path, + }); + }); + } + else if (toolName === toolNames.searchCode) { + const searchOutput = output as { files: { language: string; repository: string; fileName: string; revision: string }[] }; + searchOutput.files.forEach((file) => { + onWriteSource({ + type: 'file', + language: file.language, + repo: file.repository, + path: file.fileName, + revision: file.revision, + name: file.fileName.split('/').pop() ?? file.fileName, + }); + }); + } + else if (toolName === toolNames.findSymbolDefinitions || toolName === toolNames.findSymbolReferences) { + (output as { language: string; repository: string; fileName: string; revision: string }[]).forEach((file) => { + onWriteSource({ + type: 'file', + language: file.language, + repo: file.repository, + path: file.fileName, + revision: file.revision, + name: file.fileName.split('/').pop() ?? file.fileName, + }); + }); + } + }); + }, + experimental_telemetry: { + isEnabled: clientEnv.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT !== undefined, + metadata: { + langfuseTraceId: traceId, + }, + }, + }); + + const responseTimeMs = Date.now() - startTime; + + return { + text: result.text, + sources: collectedSources, + usage: { + inputTokens: result.totalUsage.inputTokens ?? 0, + outputTokens: result.totalUsage.outputTokens ?? 0, + totalTokens: result.totalUsage.totalTokens ?? 0, + }, + responseTimeMs, + }; } \ No newline at end of file From ee6a423537bcabd717bed48dd028800fe8f2aedb Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 17:44:41 -0800 Subject: [PATCH 02/13] wip --- packages/mcp/src/index.ts | 7 +- packages/mcp/src/schemas.ts | 5 - packages/web/package.json | 1 + .../app/api/(server)/chat/blocking/route.ts | 206 +++----- .../web/src/app/api/(server)/chat/route.ts | 31 +- packages/web/src/features/chat/agent.ts | 460 +++++------------- yarn.lock | 1 + 7 files changed, 212 insertions(+), 499 deletions(-) diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index d44b58e8..b8a9f781 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -8,8 +8,8 @@ import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; import { askCodebase, getFileSource, listCommits, listRepos, search } from './client.js'; import { env, numberSchema } from './env.js'; -import { askCodebaseRequestSchema, fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js'; -import { AskCodebaseRequest, FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js'; +import { fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js'; +import { FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js'; const dedent = _dedent.withOptions({ alignValues: true }); @@ -254,15 +254,12 @@ server.tool( `, { question: z.string().describe("The question to ask about the codebase."), - repo: z.string().describe("The repository to ask the question on."), }, async ({ question, - repo, }) => { const response = await askCodebase({ question, - repos: [repo], }); // Format the response with the answer and a link to the chat diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index 39ac069b..deb269b8 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -273,13 +273,8 @@ export const listCommitsResponseSchema = z.array(z.object({ author_email: z.string(), })); -// ============================================================================ -// Ask Codebase (Blocking Chat API) -// ============================================================================ - export const askCodebaseRequestSchema = z.object({ question: z.string().describe("The question to ask about the codebase"), - repos: z.array(z.string()).optional().describe("Optional: filter to specific repositories by name"), }); export const sourceSchema = z.object({ diff --git a/packages/web/package.json b/packages/web/package.json index a0c202d8..1b198ba2 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -134,6 +134,7 @@ "codemirror-lang-spreadsheet": "^1.3.0", "codemirror-lang-zig": "^0.1.0", "date-fns": "^4.1.0", + "dedent": "^1.7.1", "embla-carousel-auto-scroll": "^8.3.0", "embla-carousel-react": "^8.3.0", "escape-string-regexp": "^5.0.0", diff --git a/packages/web/src/app/api/(server)/chat/blocking/route.ts b/packages/web/src/app/api/(server)/chat/blocking/route.ts index fd6a0a61..8fba3dfd 100644 --- a/packages/web/src/app/api/(server)/chat/blocking/route.ts +++ b/packages/web/src/app/api/(server)/chat/blocking/route.ts @@ -1,9 +1,7 @@ import { sew } from "@/actions"; -import { _getConfiguredLanguageModelsFull, _getAISDKLanguageModelAndOptions, updateChatMessages } from "@/features/chat/actions"; -import { runAgentBlocking } from "@/features/chat/agent"; -import { ANSWER_TAG } from "@/features/chat/constants"; -import { LanguageModelInfo, SBChatMessage, Source } from "@/features/chat/types"; -import { convertLLMOutputToPortableMarkdown, getLanguageModelKey } from "@/features/chat/utils"; +import { _getConfiguredLanguageModelsFull, _getAISDKLanguageModelAndOptions, updateChatMessages, generateAndUpdateChatNameFromMessage } from "@/features/chat/actions"; +import { SBChatMessage, Source } from "@/features/chat/types"; +import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage } from "@/features/chat/utils"; import { ErrorCode } from "@/lib/errorCodes"; import { requestBodySchemaValidationError, ServiceError, serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; @@ -16,6 +14,8 @@ import { StatusCodes } from "http-status-codes"; import { headers } from "next/headers"; import { NextResponse } from "next/server"; import { z } from "zod"; +import { createMessageStream } from "../route"; +import { InferUIMessageChunk, UITools, UIDataTypes, UIMessage } from "ai"; const logger = createLogger('chat-blocking-api'); @@ -26,36 +26,15 @@ const logger = createLogger('chat-blocking-api'); const blockingChatRequestSchema = z.object({ // The question to ask about the codebase question: z.string().min(1, "Question is required"), - // Optional: filter to specific repositories (by name) - repos: z.array(z.string()).optional(), - // Optional: specify a language model (defaults to first configured model) - languageModel: z.object({ - provider: z.string(), - model: z.string(), - displayName: z.string().optional(), - }).optional(), }); /** * Response schema for the blocking chat API. */ interface BlockingChatResponse { - // The agent's final answer (markdown format) answer: string; - // ID of the persisted chat session chatId: string; - // URL to view the chat in the web UI chatUrl: string; - // Files the agent referenced during research - sources: Source[]; - // Metadata about the response - metadata: { - totalTokens: number; - inputTokens: number; - outputTokens: number; - totalResponseTimeMs: number; - modelName: string; - }; } /** @@ -70,12 +49,12 @@ interface BlockingChatResponse { export async function POST(request: Request) { const requestBody = await request.json(); const parsed = await blockingChatRequestSchema.safeParseAsync(requestBody); - + if (!parsed.success) { return serviceErrorResponse(requestBodySchemaValidationError(parsed.error)); } - const { question, repos, languageModel: requestedLanguageModel } = parsed.data; + const { question } = parsed.data; const response: BlockingChatResponse | ServiceError = await sew(() => withOptionalAuthV2(async ({ org, user, prisma }) => { @@ -89,64 +68,13 @@ export async function POST(request: Request) { } satisfies ServiceError; } - // Select the language model to use - let languageModelConfig = configuredModels[0]; // Default to first configured model - - if (requestedLanguageModel) { - const requested = requestedLanguageModel as LanguageModelInfo; - const found = configuredModels.find( - (model) => getLanguageModelKey(model) === getLanguageModelKey(requested) - ); - if (!found) { - return { - statusCode: StatusCodes.BAD_REQUEST, - errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: `Language model ${requested.model} is not configured.`, - } satisfies ServiceError; - } - languageModelConfig = found; - } + // @todo: we should probably have a option of passing the language model + // into the request body. For now, just use the first configured model. + const languageModelConfig = configuredModels[0]; - const { model, providerOptions } = await _getAISDKLanguageModelAndOptions(languageModelConfig); const modelName = languageModelConfig.displayName ?? languageModelConfig.model; - // Determine which repos to search - let searchScopeRepoNames: string[]; - - if (repos && repos.length > 0) { - // Use the provided repos filter - // Validate that these repos exist and the user has access - const validRepos = await prisma.repo.findMany({ - where: { - orgId: org.id, - name: { - in: repos, - }, - }, - select: { name: true }, - }); - - searchScopeRepoNames = validRepos.map(r => r.name); - - if (searchScopeRepoNames.length === 0) { - return { - statusCode: StatusCodes.BAD_REQUEST, - errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: "None of the specified repositories were found or accessible.", - } satisfies ServiceError; - } - } else { - // Search all repos the user has access to - const allRepos = await prisma.repo.findMany({ - where: { - orgId: org.id, - }, - select: { name: true }, - }); - searchScopeRepoNames = allRepos.map(r => r.name); - } - // Create a new chat session const chat = await prisma.chat.create({ data: { @@ -157,92 +85,76 @@ export async function POST(request: Request) { }, }); - const traceId = randomUUID(); - // Run the agent to completion - logger.info(`Starting blocking agent for chat ${chat.id}`, { + logger.debug(`Starting blocking agent for chat ${chat.id}`, { chatId: chat.id, question: question.substring(0, 100), - repoCount: searchScopeRepoNames.length, model: modelName, }); - const agentResult = await runAgentBlocking({ - model, - providerOptions, - searchScopeRepoNames, - inputMessages: [{ role: 'user', content: question }], - inputSources: [], - traceId, - }); - - // Extract the answer (removing the answer tag if present) - let answer = agentResult.text; - if (answer.startsWith(ANSWER_TAG)) { - answer = answer.slice(ANSWER_TAG.length).trim(); - } - - // Convert to portable markdown (replaces @file: references with markdown links) - const portableAnswer = convertLLMOutputToPortableMarkdown(answer); - - // Build the chat URL - const headersList = await headers(); - const baseUrl = getBaseUrl(headersList); - const chatUrl = `${baseUrl}/${org.domain}/chat/${chat.id}`; - - // Create the message history for persistence + // Create the initial user message const userMessage: SBChatMessage = { id: randomUUID(), role: 'user', parts: [{ type: 'text', text: question }], }; - const assistantMessage: SBChatMessage = { - id: randomUUID(), - role: 'assistant', - parts: [ - { type: 'text', text: agentResult.text }, - // Include sources as data parts - ...agentResult.sources.map((source) => ({ - type: 'data-source' as const, - data: source, - })), - ], - metadata: { - totalTokens: agentResult.usage.totalTokens, - totalInputTokens: agentResult.usage.inputTokens, - totalOutputTokens: agentResult.usage.outputTokens, - totalResponseTimeMs: agentResult.responseTimeMs, - modelName, - traceId, + // We'll capture the final messages and usage from the stream + let finalMessages: SBChatMessage[] = []; + + const stream = await createMessageStream({ + messages: [userMessage], + selectedSearchScopes: [], + model, + modelName, + modelProviderOptions: providerOptions, + orgId: org.id, + prisma, + onFinish: async ({ messages }) => { + finalMessages = messages; }, - }; + }) + + await Promise.all([ + // Consume the stream fully to trigger onFinish + blockStreamUntilFinish(stream), + // Generate and update the chat name + generateAndUpdateChatNameFromMessage({ + chatId: chat.id, + languageModelId: languageModelConfig.model, + message: question, + }) + ]); // Persist the messages to the chat await updateChatMessages({ chatId: chat.id, - messages: [userMessage, assistantMessage], + messages: finalMessages, }); - logger.info(`Completed blocking agent for chat ${chat.id}`, { + // Extract the answer text from the assistant message + const assistantMessage = finalMessages.find(m => m.role === 'assistant'); + const answerPart = assistantMessage + ? getAnswerPartFromAssistantMessage(assistantMessage, false) + : undefined; + const answerText = answerPart?.text ?? ''; + + // Convert to portable markdown (replaces @file: references with markdown links) + const portableAnswer = convertLLMOutputToPortableMarkdown(answerText); + + // Build the chat URL + const headersList = await headers(); + const baseUrl = getBaseUrl(headersList); + const chatUrl = `${baseUrl}/${org.domain}/chat/${chat.id}`; + + logger.debug(`Completed blocking agent for chat ${chat.id}`, { chatId: chat.id, - responseTimeMs: agentResult.responseTimeMs, - totalTokens: agentResult.usage.totalTokens, - sourceCount: agentResult.sources.length, }); return { answer: portableAnswer, chatId: chat.id, chatUrl, - sources: agentResult.sources, - metadata: { - totalTokens: agentResult.usage.totalTokens, - inputTokens: agentResult.usage.inputTokens, - outputTokens: agentResult.usage.outputTokens, - totalResponseTimeMs: agentResult.responseTimeMs, - modelName, - }, } satisfies BlockingChatResponse; }) ); @@ -251,7 +163,13 @@ export async function POST(request: Request) { return serviceErrorResponse(response); } - console.log(response); - return NextResponse.json(response); } + +const blockStreamUntilFinish = async >(stream: ReadableStream>) => { + const reader = stream.getReader(); + while (true as const) { + const { done } = await reader.read(); + if (done) break; + } +} \ No newline at end of file diff --git a/packages/web/src/app/api/(server)/chat/route.ts b/packages/web/src/app/api/(server)/chat/route.ts index 1594084d..f554595b 100644 --- a/packages/web/src/app/api/(server)/chat/route.ts +++ b/packages/web/src/app/api/(server)/chat/route.ts @@ -18,6 +18,7 @@ import { JSONValue, ModelMessage, StreamTextResult, + UIMessageStreamOnFinishCallback, UIMessageStreamOptions, UIMessageStreamWriter } from "ai"; @@ -86,15 +87,24 @@ export const POST = apiHandler(async (req: NextRequest) => { const { model, providerOptions } = await _getAISDKLanguageModelAndOptions(languageModelConfig); - return createMessageStreamResponse({ + const stream = await createMessageStream({ messages, - id, selectedSearchScopes, model, modelName: languageModelConfig.displayName ?? languageModelConfig.model, modelProviderOptions: providerOptions, orgId: org.id, prisma, + onFinish: async ({ messages }) => { + await updateChatMessages({ + chatId: id, + messages + }); + }, + }); + + return createUIMessageStreamResponse({ + stream, }); }) ) @@ -118,24 +128,24 @@ const mergeStreamAsync = async (stream: StreamTextResult, writer: UIMe interface CreateMessageStreamResponseProps { messages: SBChatMessage[]; - id: string; selectedSearchScopes: SearchScope[]; model: AISDKLanguageModelV2; modelName: string; modelProviderOptions?: Record>; orgId: number; prisma: PrismaClient; + onFinish: UIMessageStreamOnFinishCallback; } -const createMessageStreamResponse = async ({ +export const createMessageStream = async ({ messages, - id, selectedSearchScopes, model, modelName, modelProviderOptions, orgId, prisma, + onFinish, }: CreateMessageStreamResponseProps) => { const latestMessage = messages[messages.length - 1]; const sources = latestMessage.parts @@ -241,17 +251,10 @@ const createMessageStreamResponse = async ({ }, onError: errorHandler, originalMessages: messages, - onFinish: async ({ messages }) => { - await updateChatMessages({ - chatId: id, - messages - }); - }, + onFinish, }); - return createUIMessageStreamResponse({ - stream, - }); + return stream; }; const errorHandler = (error: unknown) => { diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index 75314aa2..b0d1f464 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -1,21 +1,22 @@ -import { env } from "@sourcebot/shared"; -import { env as clientEnv } from "@sourcebot/shared/client"; import { getFileSource } from '@/features/git'; import { isServiceError } from "@/lib/utils"; import { ProviderOptions } from "@ai-sdk/provider-utils"; -import { createLogger } from "@sourcebot/shared"; -import { generateText, LanguageModel, ModelMessage, StopCondition, streamText } from "ai"; +import { createLogger, env } from "@sourcebot/shared"; +import { env as clientEnv } from "@sourcebot/shared/client"; +import { LanguageModel, ModelMessage, StopCondition, streamText } from "ai"; import { ANSWER_TAG, FILE_REFERENCE_PREFIX, toolNames } from "./constants"; -import { createCodeSearchTool, findSymbolDefinitionsTool, findSymbolReferencesTool, readFilesTool, searchReposTool, listAllReposTool } from "./tools"; -import { FileSource, Source } from "./types"; +import { createCodeSearchTool, findSymbolDefinitionsTool, findSymbolReferencesTool, listAllReposTool, readFilesTool, searchReposTool } from "./tools"; +import { Source } from "./types"; import { addLineNumbers, fileReferenceToString } from "./utils"; +import _dedent from "dedent"; + +const dedent = _dedent.withOptions({ alignValues: true }); const logger = createLogger('chat-agent'); interface AgentOptions { model: LanguageModel; providerOptions?: ProviderOptions; - headers?: Record; searchScopeRepoNames: string[]; inputMessages: ModelMessage[]; inputSources: Source[]; @@ -23,12 +24,6 @@ interface AgentOptions { traceId: string; } -// If the agent exceeds the step count, then we will stop. -// eslint-disable-next-line @typescript-eslint/no-explicit-any -const stepCountIsGTE = (stepCount: number): StopCondition => { - return ({ steps }) => steps.length >= stepCount; -} - export const createAgentStream = async ({ model, providerOptions, @@ -38,15 +33,41 @@ export const createAgentStream = async ({ onWriteSource, traceId, }: AgentOptions) => { - const baseSystemPrompt = createBaseSystemPrompt({ - searchScopeRepoNames, + // For every file source, resolve the source code so that we can include it in the system prompt. + const fileSources = inputSources.filter((source) => source.type === 'file'); + const resolvedFileSources = ( + await Promise.all(fileSources.map(async (source) => { + const fileSource = await getFileSource({ + path: source.path, + repo: source.repo, + ref: source.revision, + }); + + if (isServiceError(fileSource)) { + logger.error("Error fetching file source:", fileSource); + return undefined; + } + + return { + path: fileSource.path, + source: fileSource.source, + repo: fileSource.repo, + language: fileSource.language, + revision: source.revision, + }; + })) + ).filter((source) => source !== undefined); + + const systemPrompt = createPrompt({ + repos: searchScopeRepoNames, + files: resolvedFileSources, }); const stream = streamText({ model, providerOptions, - system: baseSystemPrompt, messages: inputMessages, + system: systemPrompt, tools: { [toolNames.searchCode]: createCodeSearchTool(searchScopeRepoNames), [toolNames.readFiles]: readFilesTool, @@ -55,49 +76,14 @@ export const createAgentStream = async ({ [toolNames.searchRepos]: searchReposTool, [toolNames.listAllRepos]: listAllReposTool, }, - prepareStep: async ({ stepNumber }) => { - // The first step attaches any mentioned sources to the system prompt. - if (stepNumber === 0 && inputSources.length > 0) { - const fileSources = inputSources.filter((source) => source.type === 'file'); - - const resolvedFileSources = ( - await Promise.all(fileSources.map(resolveFileSource))) - .filter((source) => source !== undefined) - - const fileSourcesSystemPrompt = await createFileSourcesSystemPrompt({ - files: resolvedFileSources - }); - - return { - system: `${baseSystemPrompt}\n\n${fileSourcesSystemPrompt}` - } - } - - if (stepNumber === env.SOURCEBOT_CHAT_MAX_STEP_COUNT - 1) { - return { - system: `**CRITICAL**: You have reached the maximum number of steps!! YOU MUST PROVIDE YOUR FINAL ANSWER NOW. DO NOT KEEP RESEARCHING.\n\n${answerInstructions}`, - activeTools: [], - } - } - - return undefined; - }, temperature: env.SOURCEBOT_CHAT_MODEL_TEMPERATURE, stopWhen: [ stepCountIsGTE(env.SOURCEBOT_CHAT_MAX_STEP_COUNT), ], - toolChoice: "auto", // Let the model decide when to use tools + toolChoice: "auto", onStepFinish: ({ toolResults }) => { - // This takes care of extracting any sources that the LLM has seen as part of - // the tool calls it made. toolResults.forEach(({ toolName, output, dynamic }) => { - // we don't care about dynamic tool results here. - if (dynamic) { - return; - } - - if (isServiceError(output)) { - // is there something we want to do here? + if (dynamic || isServiceError(output)) { return; } @@ -110,10 +96,9 @@ export const createAgentStream = async ({ path: file.path, revision: file.revision, name: file.path.split('/').pop() ?? file.path, - }) - }) - } - else if (toolName === toolNames.searchCode) { + }); + }); + } else if (toolName === toolNames.searchCode) { output.files.forEach((file) => { onWriteSource({ type: 'file', @@ -122,10 +107,9 @@ export const createAgentStream = async ({ path: file.fileName, revision: file.revision, name: file.fileName.split('/').pop() ?? file.fileName, - }) - }) - } - else if (toolName === toolNames.findSymbolDefinitions || toolName === toolNames.findSymbolReferences) { + }); + }); + } else if (toolName === toolNames.findSymbolDefinitions || toolName === toolNames.findSymbolReferences) { output.forEach((file) => { onWriteSource({ type: 'file', @@ -134,12 +118,11 @@ export const createAgentStream = async ({ path: file.fileName, revision: file.revision, name: file.fileName.split('/').pop() ?? file.fileName, - }) - }) + }); + }); } - }) + }); }, - // Only enable langfuse traces in cloud environments. experimental_telemetry: { isEnabled: clientEnv.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT !== undefined, metadata: { @@ -154,277 +137,92 @@ export const createAgentStream = async ({ return stream; } -interface BaseSystemPromptOptions { - searchScopeRepoNames: string[]; -} - -export const createBaseSystemPrompt = ({ - searchScopeRepoNames, -}: BaseSystemPromptOptions) => { - return ` -You are a powerful agentic AI code assistant built into Sourcebot, the world's best code-intelligence platform. Your job is to help developers understand and navigate their large codebases. - - -Your workflow has two distinct phases: - -**Phase 1: Research & Analysis** -- Analyze the user's question and determine what context you need -- Use available tools to gather code, search repositories, find references, etc. -- Think through the problem and collect all relevant information -- Do NOT provide partial answers or explanations during this phase - -**Phase 2: Structured Response** -- **MANDATORY**: You MUST always enter this phase and provide a structured markdown response, regardless of whether phase 1 was completed or interrupted -- Provide your final response based on whatever context you have available -- Always format your response according to the required response format below - - - -The user has selected the following repositories for analysis: -${searchScopeRepoNames.map(repo => `- ${repo}`).join('\n')} - - - -During the research phase, use the tools available to you to gather comprehensive context before answering. Always explain why you're using each tool. Depending on the user's question, you may need to use multiple tools. If the question is vague, ask the user for more information. - - -${answerInstructions} -`; -} - -const answerInstructions = ` - -When you have sufficient context, output your answer as a structured markdown response. - -**Required Response Format:** -- **CRITICAL**: You MUST always prefix your answer with a \`${ANSWER_TAG}\` tag at the very top of your response -- **CRITICAL**: You MUST provide your complete response in markdown format with embedded code references -- **CODE REFERENCE REQUIREMENT**: Whenever you mention, discuss, or refer to ANY specific part of the code (files, functions, variables, methods, classes, imports, etc.), you MUST immediately follow with a code reference using the format \`${fileReferenceToString({ repo: 'repository', path: 'filename'})}\` or \`${fileReferenceToString({ repo: 'repository', path: 'filename', range: { startLine: 1, endLine: 10 } })}\` (where the numbers are the start and end line numbers of the code snippet). This includes: - - Files (e.g., "The \`auth.ts\` file" → must include \`${fileReferenceToString({ repo: 'repository', path: 'auth.ts' })}\`) - - Function names (e.g., "The \`getRepos()\` function" → must include \`${fileReferenceToString({ repo: 'repository', path: 'auth.ts', range: { startLine: 15, endLine: 20 } })}\`) - - Variable names (e.g., "The \`suggestionQuery\` variable" → must include \`${fileReferenceToString({ repo: 'repository', path: 'search.ts', range: { startLine: 42, endLine: 42 } })}\`) - - Any code snippet or line you're explaining - - Class names, method calls, imports, etc. -- Some examples of both correct and incorrect code references: - - Correct: @file:{repository::path/to/file.ts} - - Correct: @file:{repository::path/to/file.ts:10-15} - - Incorrect: @file{repository::path/to/file.ts} (missing colon) - - Incorrect: @file:repository::path/to/file.ts (missing curly braces) - - Incorrect: @file:{repository::path/to/file.ts:10-25,30-35} (multiple ranges not supported) - - Incorrect: @file:{path/to/file.ts} (missing repository) -- Be clear and very concise. Use bullet points where appropriate -- Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference -- If you cannot provide a code reference for something you're discussing, do not mention that specific code element -- Always prefer to use \`${FILE_REFERENCE_PREFIX}\` over \`\`\`code\`\`\` blocks. - -**Example answer structure:** -\`\`\`markdown -${ANSWER_TAG} -Authentication in Sourcebot is built on NextAuth.js with a session-based approach using JWT tokens and Prisma as the database adapter ${fileReferenceToString({ repo: 'github.com/sourcebot-dev/sourcebot', path: 'auth.ts', range: { startLine: 135, endLine: 140 } })}. The system supports multiple authentication providers and implements organization-based authorization with role-defined permissions. -\`\`\` - - -`; - -interface FileSourcesSystemPromptOptions { - files: { +const createPrompt = ({ + files, + repos, +}: { + files?: { path: string; source: string; repo: string; language: string; revision: string; - }[]; -} - -const createFileSourcesSystemPrompt = async ({ files }: FileSourcesSystemPromptOptions) => { - return ` -The user has mentioned the following files, which are automatically included for analysis. - -${files.map(file => ` -${addLineNumbers(file.source)} -`).join('\n\n')} - `.trim(); -} - -const resolveFileSource = async ({ path, repo, revision }: FileSource) => { - const fileSource = await getFileSource({ - path, - repo, - ref: revision, - }); - - if (isServiceError(fileSource)) { - // @todo: handle this - logger.error("Error fetching file source:", fileSource) - return undefined; - } - - return { - path, - source: fileSource.source, - repo, - language: fileSource.language, - revision, - } + }[], + repos: string[], +}) => { + return dedent` + You are a powerful agentic AI code assistant built into Sourcebot, the world's best code-intelligence platform. Your job is to help developers understand and navigate their large codebases. + + + Your workflow has two distinct phases: + + **Phase 1: Research & Analysis** + - Analyze the user's question and determine what context you need + - Use available tools to gather code, search repositories, find references, etc. + - Think through the problem and collect all relevant information + - Do NOT provide partial answers or explanations during this phase + + **Phase 2: Structured Response** + - **MANDATORY**: You MUST always enter this phase and provide a structured markdown response, regardless of whether phase 1 was completed or interrupted + - Provide your final response based on whatever context you have available + - Always format your response according to the required response format below + + + + During the research phase, use the tools available to you to gather comprehensive context before answering. Always explain why you're using each tool. Depending on the user's question, you may need to use multiple tools. If the question is vague, ask the user for more information. + + + + The user has selected the following repositories for analysis: + ${repos.map(scope => `- ${scope}`).join('\n')} + + + ${files ? dedent` + + The user has mentioned the following files, which are automatically included for analysis. + + ${files?.map(file => ` + ${addLineNumbers(file.source)} + `).join('\n\n')} + + `: ''} + + + When you have sufficient context, output your answer as a structured markdown response. + + **Required Response Format:** + - **CRITICAL**: You MUST always prefix your answer with a \`${ANSWER_TAG}\` tag at the very top of your response + - **CRITICAL**: You MUST provide your complete response in markdown format with embedded code references + - **CODE REFERENCE REQUIREMENT**: Whenever you mention, discuss, or refer to ANY specific part of the code (files, functions, variables, methods, classes, imports, etc.), you MUST immediately follow with a code reference using the format \`${fileReferenceToString({ repo: 'repository', path: 'filename' })}\` or \`${fileReferenceToString({ repo: 'repository', path: 'filename', range: { startLine: 1, endLine: 10 } })}\` (where the numbers are the start and end line numbers of the code snippet). This includes: + - Files (e.g., "The \`auth.ts\` file" → must include \`${fileReferenceToString({ repo: 'repository', path: 'auth.ts' })}\`) + - Function names (e.g., "The \`getRepos()\` function" → must include \`${fileReferenceToString({ repo: 'repository', path: 'auth.ts', range: { startLine: 15, endLine: 20 } })}\`) + - Variable names (e.g., "The \`suggestionQuery\` variable" → must include \`${fileReferenceToString({ repo: 'repository', path: 'search.ts', range: { startLine: 42, endLine: 42 } })}\`) + - Any code snippet or line you're explaining + - Class names, method calls, imports, etc. + - Some examples of both correct and incorrect code references: + - Correct: @file:{repository::path/to/file.ts} + - Correct: @file:{repository::path/to/file.ts:10-15} + - Incorrect: @file{repository::path/to/file.ts} (missing colon) + - Incorrect: @file:repository::path/to/file.ts (missing curly braces) + - Incorrect: @file:{repository::path/to/file.ts:10-25,30-35} (multiple ranges not supported) + - Incorrect: @file:{path/to/file.ts} (missing repository) + - Be clear and very concise. Use bullet points where appropriate + - Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference + - If you cannot provide a code reference for something you're discussing, do not mention that specific code element + - Always prefer to use \`${FILE_REFERENCE_PREFIX}\` over \`\`\`code\`\`\` blocks. + + **Example answer structure:** + \`\`\`markdown + ${ANSWER_TAG} + Authentication in Sourcebot is built on NextAuth.js with a session-based approach using JWT tokens and Prisma as the database adapter ${fileReferenceToString({ repo: 'github.com/sourcebot-dev/sourcebot', path: 'auth.ts', range: { startLine: 135, endLine: 140 } })}. The system supports multiple authentication providers and implements organization-based authorization with role-defined permissions. + \`\`\` + + + ` } -// ============================================================================ -// Blocking Agent Execution (for MCP and other non-streaming use cases) -// ============================================================================ - -interface BlockingAgentOptions { - model: LanguageModel; - providerOptions?: ProviderOptions; - searchScopeRepoNames: string[]; - inputMessages: ModelMessage[]; - inputSources: Source[]; - traceId: string; -} - -export interface BlockingAgentResult { - text: string; - sources: Source[]; - usage: { - inputTokens: number; - outputTokens: number; - totalTokens: number; - }; - responseTimeMs: number; +// If the agent exceeds the step count, then we will stop. +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const stepCountIsGTE = (stepCount: number): StopCondition => { + return ({ steps }) => steps.length >= stepCount; } - -/** - * Runs the chat agent in blocking mode, waiting for the complete response. - * This is used by the MCP server and other integrations that don't support streaming. - */ -export const runAgentBlocking = async ({ - model, - providerOptions, - inputMessages, - inputSources, - searchScopeRepoNames, - traceId, -}: BlockingAgentOptions): Promise => { - const startTime = Date.now(); - const collectedSources: Source[] = []; - - const onWriteSource = (source: Source) => { - // Deduplicate sources by checking if we already have this file - const exists = collectedSources.some( - (s) => s.type === source.type && - s.type === 'file' && source.type === 'file' && - s.repo === source.repo && - s.path === source.path - ); - if (!exists) { - collectedSources.push(source); - } - }; - - const baseSystemPrompt = createBaseSystemPrompt({ searchScopeRepoNames }); - - // Resolve any input file sources for the first step - let systemPromptWithSources = baseSystemPrompt; - if (inputSources.length > 0) { - const fileSources = inputSources.filter((source) => source.type === 'file'); - const resolvedFileSources = ( - await Promise.all(fileSources.map(resolveFileSource)) - ).filter((source) => source !== undefined); - - if (resolvedFileSources.length > 0) { - const fileSourcesSystemPrompt = await createFileSourcesSystemPrompt({ - files: resolvedFileSources - }); - systemPromptWithSources = `${baseSystemPrompt}\n\n${fileSourcesSystemPrompt}`; - } - } - - const result = await generateText({ - model, - providerOptions, - system: systemPromptWithSources, - messages: inputMessages, - tools: { - [toolNames.searchCode]: createCodeSearchTool(searchScopeRepoNames), - [toolNames.readFiles]: readFilesTool, - [toolNames.findSymbolReferences]: findSymbolReferencesTool, - [toolNames.findSymbolDefinitions]: findSymbolDefinitionsTool, - [toolNames.searchRepos]: searchReposTool, - [toolNames.listAllRepos]: listAllReposTool, - }, - temperature: env.SOURCEBOT_CHAT_MODEL_TEMPERATURE, - stopWhen: [ - stepCountIsGTE(env.SOURCEBOT_CHAT_MAX_STEP_COUNT), - ], - toolChoice: "auto", - onStepFinish: ({ toolResults }) => { - // Extract sources from tool results (same logic as streaming version) - toolResults.forEach(({ toolName, output, dynamic }) => { - // We don't care about dynamic tool results here. - if (dynamic) { - return; - } - - if (isServiceError(output)) { - return; - } - - if (toolName === toolNames.readFiles) { - (output as { path: string; repository: string; language: string; revision: string }[]).forEach((file) => { - onWriteSource({ - type: 'file', - language: file.language, - repo: file.repository, - path: file.path, - revision: file.revision, - name: file.path.split('/').pop() ?? file.path, - }); - }); - } - else if (toolName === toolNames.searchCode) { - const searchOutput = output as { files: { language: string; repository: string; fileName: string; revision: string }[] }; - searchOutput.files.forEach((file) => { - onWriteSource({ - type: 'file', - language: file.language, - repo: file.repository, - path: file.fileName, - revision: file.revision, - name: file.fileName.split('/').pop() ?? file.fileName, - }); - }); - } - else if (toolName === toolNames.findSymbolDefinitions || toolName === toolNames.findSymbolReferences) { - (output as { language: string; repository: string; fileName: string; revision: string }[]).forEach((file) => { - onWriteSource({ - type: 'file', - language: file.language, - repo: file.repository, - path: file.fileName, - revision: file.revision, - name: file.fileName.split('/').pop() ?? file.fileName, - }); - }); - } - }); - }, - experimental_telemetry: { - isEnabled: clientEnv.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT !== undefined, - metadata: { - langfuseTraceId: traceId, - }, - }, - }); - - const responseTimeMs = Date.now() - startTime; - - return { - text: result.text, - sources: collectedSources, - usage: { - inputTokens: result.totalUsage.inputTokens ?? 0, - outputTokens: result.totalUsage.outputTokens ?? 0, - totalTokens: result.totalUsage.totalTokens ?? 0, - }, - responseTimeMs, - }; -} \ No newline at end of file diff --git a/yarn.lock b/yarn.lock index 892f4f46..3bf7a2bf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8379,6 +8379,7 @@ __metadata: codemirror-lang-zig: "npm:^0.1.0" cross-env: "npm:^7.0.3" date-fns: "npm:^4.1.0" + dedent: "npm:^1.7.1" embla-carousel-auto-scroll: "npm:^8.3.0" embla-carousel-react: "npm:^8.3.0" escape-string-regexp: "npm:^5.0.0" From 865d88e2526af34c60ea9516a331deafd7a57b43 Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 18:18:06 -0800 Subject: [PATCH 03/13] add back selection --- packages/mcp/src/index.ts | 20 ++------ packages/mcp/src/schemas.ts | 16 +++---- .../app/api/(server)/chat/blocking/route.ts | 47 +++++++++++++++---- .../web/src/app/api/(server)/chat/route.ts | 7 ++- packages/web/src/features/chat/agent.ts | 18 +++---- 5 files changed, 63 insertions(+), 45 deletions(-) diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index b8a9f781..5b96238d 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -8,8 +8,8 @@ import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; import { askCodebase, getFileSource, listCommits, listRepos, search } from './client.js'; import { env, numberSchema } from './env.js'; -import { fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js'; -import { FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js'; +import { askCodebaseRequestSchema, fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js'; +import { AskCodebaseRequest, FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js'; const dedent = _dedent.withOptions({ alignValues: true }); @@ -252,15 +252,9 @@ server.tool( This is a blocking operation that may take 30-60+ seconds for complex questions as the agent researches the codebase. `, - { - question: z.string().describe("The question to ask about the codebase."), - }, - async ({ - question, - }) => { - const response = await askCodebase({ - question, - }); + askCodebaseRequestSchema.shape, + async (request: AskCodebaseRequest) => { + const response = await askCodebase(request); // Format the response with the answer and a link to the chat const formattedResponse = dedent` @@ -268,10 +262,6 @@ server.tool( --- **View full research session:** ${response.chatUrl} - - **Sources referenced:** ${response.sources.length} files - **Response time:** ${(response.metadata.totalResponseTimeMs / 1000).toFixed(1)}s - **Model:** ${response.metadata.modelName} `; return { diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index deb269b8..a2b49262 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -274,7 +274,13 @@ export const listCommitsResponseSchema = z.array(z.object({ })); export const askCodebaseRequestSchema = z.object({ - question: z.string().describe("The question to ask about the codebase"), + query: z + .string() + .describe("The query to ask about the codebase."), + repos: z + .array(z.string()) + .optional() + .describe("The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible."), }); export const sourceSchema = z.object({ @@ -290,12 +296,4 @@ export const askCodebaseResponseSchema = z.object({ answer: z.string().describe("The agent's final answer in markdown format"), chatId: z.string().describe("ID of the persisted chat session"), chatUrl: z.string().describe("URL to view the chat in the web UI"), - sources: z.array(sourceSchema).describe("Files the agent referenced during research"), - metadata: z.object({ - totalTokens: z.number(), - inputTokens: z.number(), - outputTokens: z.number(), - totalResponseTimeMs: z.number(), - modelName: z.string(), - }).describe("Metadata about the response"), }); diff --git a/packages/web/src/app/api/(server)/chat/blocking/route.ts b/packages/web/src/app/api/(server)/chat/blocking/route.ts index 8fba3dfd..4c24cb19 100644 --- a/packages/web/src/app/api/(server)/chat/blocking/route.ts +++ b/packages/web/src/app/api/(server)/chat/blocking/route.ts @@ -1,9 +1,9 @@ import { sew } from "@/actions"; import { _getConfiguredLanguageModelsFull, _getAISDKLanguageModelAndOptions, updateChatMessages, generateAndUpdateChatNameFromMessage } from "@/features/chat/actions"; -import { SBChatMessage, Source } from "@/features/chat/types"; +import { SBChatMessage, SearchScope } from "@/features/chat/types"; import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage } from "@/features/chat/utils"; import { ErrorCode } from "@/lib/errorCodes"; -import { requestBodySchemaValidationError, ServiceError, serviceErrorResponse } from "@/lib/serviceError"; +import { requestBodySchemaValidationError, ServiceError, ServiceErrorException, serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; import { getBaseUrl } from "@/lib/utils.server"; import { withOptionalAuthV2 } from "@/withAuthV2"; @@ -24,8 +24,13 @@ const logger = createLogger('chat-blocking-api'); * This is a simpler interface designed for MCP and other programmatic integrations. */ const blockingChatRequestSchema = z.object({ - // The question to ask about the codebase - question: z.string().min(1, "Question is required"), + query: z + .string() + .describe("The query to ask about the codebase."), + repos: z + .array(z.string()) + .optional() + .describe("The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible."), }); /** @@ -54,7 +59,7 @@ export async function POST(request: Request) { return serviceErrorResponse(requestBodySchemaValidationError(parsed.error)); } - const { question } = parsed.data; + const { query, repos = [] } = parsed.data; const response: BlockingChatResponse | ServiceError = await sew(() => withOptionalAuthV2(async ({ org, user, prisma }) => { @@ -88,7 +93,7 @@ export async function POST(request: Request) { // Run the agent to completion logger.debug(`Starting blocking agent for chat ${chat.id}`, { chatId: chat.id, - question: question.substring(0, 100), + query: query.substring(0, 100), model: modelName, }); @@ -96,15 +101,39 @@ export async function POST(request: Request) { const userMessage: SBChatMessage = { id: randomUUID(), role: 'user', - parts: [{ type: 'text', text: question }], + parts: [{ type: 'text', text: query }], }; + const selectedSearchScopes: SearchScope[] = []; + for (const repo of repos) { + const repoDB = await prisma.repo.findFirst({ + where: { + name: repo, + }, + }); + + if (!repoDB) { + throw new ServiceErrorException({ + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Repository '${repo}' not found.`, + }) + } + + selectedSearchScopes.push({ + type: 'repo', + value: repoDB.name, + name: repoDB.displayName ?? repoDB.name.split('/').pop() ?? repoDB.name, + codeHostType: repoDB.external_codeHostType, + }) + } + // We'll capture the final messages and usage from the stream let finalMessages: SBChatMessage[] = []; const stream = await createMessageStream({ messages: [userMessage], - selectedSearchScopes: [], + selectedSearchScopes, model, modelName, modelProviderOptions: providerOptions, @@ -122,7 +151,7 @@ export async function POST(request: Request) { generateAndUpdateChatNameFromMessage({ chatId: chat.id, languageModelId: languageModelConfig.model, - message: question, + message: query, }) ]); diff --git a/packages/web/src/app/api/(server)/chat/route.ts b/packages/web/src/app/api/(server)/chat/route.ts index f554595b..0ac70390 100644 --- a/packages/web/src/app/api/(server)/chat/route.ts +++ b/packages/web/src/app/api/(server)/chat/route.ts @@ -184,7 +184,7 @@ export const createMessageStream = async ({ const startTime = new Date(); - const expandedReposArrays = await Promise.all(selectedSearchScopes.map(async (scope) => { + const expandedRepos = (await Promise.all(selectedSearchScopes.map(async (scope) => { if (scope.type === 'repo') { return [scope.value]; } @@ -206,15 +206,14 @@ export const createMessageStream = async ({ } return []; - })); - const expandedRepos = expandedReposArrays.flat(); + }))).flat() const researchStream = await createAgentStream({ model, providerOptions: modelProviderOptions, inputMessages: messageHistory, inputSources: sources, - searchScopeRepoNames: expandedRepos, + selectedRepos: expandedRepos, onWriteSource: (source) => { writer.write({ type: 'data-source', diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index b0d1f464..46f39d14 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -17,7 +17,7 @@ const logger = createLogger('chat-agent'); interface AgentOptions { model: LanguageModel; providerOptions?: ProviderOptions; - searchScopeRepoNames: string[]; + selectedRepos: string[]; inputMessages: ModelMessage[]; inputSources: Source[]; onWriteSource: (source: Source) => void; @@ -29,7 +29,7 @@ export const createAgentStream = async ({ providerOptions, inputMessages, inputSources, - searchScopeRepoNames, + selectedRepos, onWriteSource, traceId, }: AgentOptions) => { @@ -59,7 +59,7 @@ export const createAgentStream = async ({ ).filter((source) => source !== undefined); const systemPrompt = createPrompt({ - repos: searchScopeRepoNames, + repos: selectedRepos, files: resolvedFileSources, }); @@ -69,7 +69,7 @@ export const createAgentStream = async ({ messages: inputMessages, system: systemPrompt, tools: { - [toolNames.searchCode]: createCodeSearchTool(searchScopeRepoNames), + [toolNames.searchCode]: createCodeSearchTool(selectedRepos), [toolNames.readFiles]: readFilesTool, [toolNames.findSymbolReferences]: findSymbolReferencesTool, [toolNames.findSymbolDefinitions]: findSymbolDefinitionsTool, @@ -172,10 +172,12 @@ const createPrompt = ({ During the research phase, use the tools available to you to gather comprehensive context before answering. Always explain why you're using each tool. Depending on the user's question, you may need to use multiple tools. If the question is vague, ask the user for more information. - - The user has selected the following repositories for analysis: - ${repos.map(scope => `- ${scope}`).join('\n')} - + ${repos.length > 0 ? dedent` + + The user has explicitly selected the following repositories for analysis: + ${repos.map(repo => `- ${repo}`).join('\n')} + + ` : ''} ${files ? dedent` From f1e264f639a61383feb797e32aed1ded6919554c Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 18:57:47 -0800 Subject: [PATCH 04/13] switch to using pagination api for listing repos. Remove search repos tool since it is redudant with lsit repos --- packages/web/src/app/api/(client)/client.ts | 4 +- .../app/api/(server)/repos/listReposApi.ts | 59 +++++++++++++++++ .../web/src/app/api/(server)/repos/route.ts | 60 +++--------------- packages/web/src/features/chat/agent.ts | 5 +- .../components/chatThread/detailsCard.tsx | 14 +---- ...mponent.tsx => listReposToolComponent.tsx} | 10 +-- .../tools/searchReposToolComponent.tsx | 63 ------------------- packages/web/src/features/chat/constants.ts | 6 +- packages/web/src/features/chat/tools.ts | 62 ++++-------------- packages/web/src/features/chat/types.ts | 5 +- 10 files changed, 97 insertions(+), 191 deletions(-) create mode 100644 packages/web/src/app/api/(server)/repos/listReposApi.ts rename packages/web/src/features/chat/components/chatThread/tools/{listAllReposToolComponent.tsx => listReposToolComponent.tsx} (89%) delete mode 100644 packages/web/src/features/chat/components/chatThread/tools/searchReposToolComponent.tsx diff --git a/packages/web/src/app/api/(client)/client.ts b/packages/web/src/app/api/(client)/client.ts index 7cb2f95c..6d0745e4 100644 --- a/packages/web/src/app/api/(client)/client.ts +++ b/packages/web/src/app/api/(client)/client.ts @@ -38,7 +38,7 @@ export const search = async (body: SearchRequest): Promise => { - const url = new URL("/api/source", window.location.origin); + const url = new URL("/api/source"); for (const [key, value] of Object.entries(queryParams)) { url.searchParams.set(key, value.toString()); } @@ -54,7 +54,7 @@ export const getFileSource = async (queryParams: FileSourceRequest): Promise => { - const url = new URL("/api/repos", window.location.origin); + const url = new URL("/api/repos"); for (const [key, value] of Object.entries(queryParams)) { url.searchParams.set(key, value.toString()); } diff --git a/packages/web/src/app/api/(server)/repos/listReposApi.ts b/packages/web/src/app/api/(server)/repos/listReposApi.ts new file mode 100644 index 00000000..14aa1c95 --- /dev/null +++ b/packages/web/src/app/api/(server)/repos/listReposApi.ts @@ -0,0 +1,59 @@ +import { sew } from "@/actions"; +import { repositoryQuerySchema } from "@/lib/schemas"; +import { ListReposQueryParams } from "@/lib/types"; +import { withOptionalAuthV2 } from "@/withAuthV2"; +import { headers } from "next/headers"; +import { getBaseUrl } from "@/lib/utils.server"; +import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; + +export const listRepos = async ({ query, page, perPage, sort, direction }: ListReposQueryParams) => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + const skip = (page - 1) * perPage; + const orderByField = sort === 'pushed' ? 'pushedAt' : 'name'; + + const headersList = await headers(); + const baseUrl = getBaseUrl(headersList); + + const [repos, totalCount] = await Promise.all([ + prisma.repo.findMany({ + where: { + orgId: org.id, + ...(query ? { + name: { contains: query, mode: 'insensitive' }, + } : {}), + }, + skip, + take: perPage, + orderBy: { [orderByField]: direction }, + }), + prisma.repo.count({ + where: { + orgId: org.id, + ...(query ? { + name: { contains: query, mode: 'insensitive' }, + } : {}), + }, + }), + ]); + + return { + data: repos.map((repo) => repositoryQuerySchema.parse({ + codeHostType: repo.external_codeHostType, + repoId: repo.id, + repoName: repo.name, + webUrl: `${baseUrl}${getBrowsePath({ + repoName: repo.name, + path: '', + pathType: 'tree', + domain: org.domain, + })}`, + repoDisplayName: repo.displayName ?? undefined, + externalWebUrl: repo.webUrl ?? undefined, + imageUrl: repo.imageUrl ?? undefined, + indexedAt: repo.indexedAt ?? undefined, + pushedAt: repo.pushedAt ?? undefined, + })), + totalCount, + }; + }) +) \ No newline at end of file diff --git a/packages/web/src/app/api/(server)/repos/route.ts b/packages/web/src/app/api/(server)/repos/route.ts index c3bb47d3..cbc0e98b 100644 --- a/packages/web/src/app/api/(server)/repos/route.ts +++ b/packages/web/src/app/api/(server)/repos/route.ts @@ -1,12 +1,10 @@ -import { sew } from "@/actions"; -import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; import { apiHandler } from "@/lib/apiHandler"; import { buildLinkHeader } from "@/lib/pagination"; -import { listReposQueryParamsSchema, repositoryQuerySchema } from "@/lib/schemas"; +import { listReposQueryParamsSchema } from "@/lib/schemas"; import { queryParamsSchemaValidationError, serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; -import { withOptionalAuthV2 } from "@/withAuthV2"; import { NextRequest } from "next/server"; +import { listRepos } from "./listReposApi"; export const GET = apiHandler(async (request: NextRequest) => { const rawParams = Object.fromEntries( @@ -22,54 +20,14 @@ export const GET = apiHandler(async (request: NextRequest) => { } const { page, perPage, sort, direction, query } = parseResult.data; - const skip = (page - 1) * perPage; - const orderByField = sort === 'pushed' ? 'pushedAt' : 'name'; - const response = await sew(() => - withOptionalAuthV2(async ({ org, prisma }) => { - const [repos, totalCount] = await Promise.all([ - prisma.repo.findMany({ - where: { - orgId: org.id, - ...(query ? { - name: { contains: query, mode: 'insensitive' }, - } : {}), - }, - skip, - take: perPage, - orderBy: { [orderByField]: direction }, - }), - prisma.repo.count({ - where: { - orgId: org.id, - ...(query ? { - name: { contains: query, mode: 'insensitive' }, - } : {}), - }, - }), - ]); - - return { - data: repos.map((repo) => repositoryQuerySchema.parse({ - codeHostType: repo.external_codeHostType, - repoId: repo.id, - repoName: repo.name, - webUrl: `${request.nextUrl.origin}${getBrowsePath({ - repoName: repo.name, - path: '', - pathType: 'tree', - domain: org.domain, - })}`, - repoDisplayName: repo.displayName ?? undefined, - externalWebUrl: repo.webUrl ?? undefined, - imageUrl: repo.imageUrl ?? undefined, - indexedAt: repo.indexedAt ?? undefined, - pushedAt: repo.pushedAt ?? undefined, - })), - totalCount, - }; - }) - ); + const response = await listRepos({ + page, + perPage, + sort, + direction, + query, + }) if (isServiceError(response)) { return serviceErrorResponse(response); diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index 46f39d14..cbd6a54f 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -5,7 +5,7 @@ import { createLogger, env } from "@sourcebot/shared"; import { env as clientEnv } from "@sourcebot/shared/client"; import { LanguageModel, ModelMessage, StopCondition, streamText } from "ai"; import { ANSWER_TAG, FILE_REFERENCE_PREFIX, toolNames } from "./constants"; -import { createCodeSearchTool, findSymbolDefinitionsTool, findSymbolReferencesTool, listAllReposTool, readFilesTool, searchReposTool } from "./tools"; +import { createCodeSearchTool, findSymbolDefinitionsTool, findSymbolReferencesTool, listReposTool, readFilesTool } from "./tools"; import { Source } from "./types"; import { addLineNumbers, fileReferenceToString } from "./utils"; import _dedent from "dedent"; @@ -73,8 +73,7 @@ export const createAgentStream = async ({ [toolNames.readFiles]: readFilesTool, [toolNames.findSymbolReferences]: findSymbolReferencesTool, [toolNames.findSymbolDefinitions]: findSymbolDefinitionsTool, - [toolNames.searchRepos]: searchReposTool, - [toolNames.listAllRepos]: listAllReposTool, + [toolNames.listRepos]: listReposTool, }, temperature: env.SOURCEBOT_CHAT_MODEL_TEMPERATURE, stopWhen: [ diff --git a/packages/web/src/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/features/chat/components/chatThread/detailsCard.tsx index 26d16437..866d9364 100644 --- a/packages/web/src/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/features/chat/components/chatThread/detailsCard.tsx @@ -13,8 +13,7 @@ import { FindSymbolDefinitionsToolComponent } from './tools/findSymbolDefinition import { FindSymbolReferencesToolComponent } from './tools/findSymbolReferencesToolComponent'; import { ReadFilesToolComponent } from './tools/readFilesToolComponent'; import { SearchCodeToolComponent } from './tools/searchCodeToolComponent'; -import { SearchReposToolComponent } from './tools/searchReposToolComponent'; -import { ListAllReposToolComponent } from './tools/listAllReposToolComponent'; +import { ListReposToolComponent } from './tools/listReposToolComponent'; import { SBChatMessageMetadata, SBChatMessagePart } from '../../types'; import { SearchScopeIcon } from '../searchScopeIcon'; import isEqual from "fast-deep-equal/react"; @@ -184,16 +183,9 @@ const DetailsCardComponent = ({ part={part} /> ) - case 'tool-searchRepos': + case 'tool-listRepos': return ( - - ) - case 'tool-listAllRepos': - return ( - diff --git a/packages/web/src/features/chat/components/chatThread/tools/listAllReposToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx similarity index 89% rename from packages/web/src/features/chat/components/chatThread/tools/listAllReposToolComponent.tsx rename to packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx index 6c06146c..3639b598 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/listAllReposToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx @@ -1,6 +1,6 @@ 'use client'; -import { ListAllReposToolUIPart } from "@/features/chat/tools"; +import { ListReposToolUIPart } from "@/features/chat/tools"; import { isServiceError } from "@/lib/utils"; import { useMemo, useState } from "react"; import { ToolHeader, TreeList } from "./shared"; @@ -8,18 +8,18 @@ import { CodeSnippet } from "@/app/components/codeSnippet"; import { Separator } from "@/components/ui/separator"; import { FolderOpenIcon } from "lucide-react"; -export const ListAllReposToolComponent = ({ part }: { part: ListAllReposToolUIPart }) => { +export const ListReposToolComponent = ({ part }: { part: ListReposToolUIPart }) => { const [isExpanded, setIsExpanded] = useState(false); const label = useMemo(() => { switch (part.state) { case 'input-streaming': - return 'Loading all repositories...'; + return 'Listing repositories...'; case 'output-error': - return '"List all repositories" tool call failed'; + return '"List repositories" tool call failed'; case 'input-available': case 'output-available': - return 'Listed all repositories'; + return 'Listed repositories'; } }, [part]); diff --git a/packages/web/src/features/chat/components/chatThread/tools/searchReposToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/searchReposToolComponent.tsx deleted file mode 100644 index 218cdba4..00000000 --- a/packages/web/src/features/chat/components/chatThread/tools/searchReposToolComponent.tsx +++ /dev/null @@ -1,63 +0,0 @@ -'use client'; - -import { SearchReposToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; -import { useMemo, useState } from "react"; -import { ToolHeader, TreeList } from "./shared"; -import { CodeSnippet } from "@/app/components/codeSnippet"; -import { Separator } from "@/components/ui/separator"; -import { BookMarkedIcon } from "lucide-react"; - -export const SearchReposToolComponent = ({ part }: { part: SearchReposToolUIPart }) => { - const [isExpanded, setIsExpanded] = useState(false); - - const label = useMemo(() => { - switch (part.state) { - case 'input-streaming': - return 'Searching repositories...'; - case 'output-error': - return '"Search repositories" tool call failed'; - case 'input-available': - case 'output-available': - return Searched for repositories: {part.input.query}; - } - }, [part]); - - return ( -
- - {part.state === 'output-available' && isExpanded && ( - <> - {isServiceError(part.output) ? ( - - Failed with the following error: {part.output.message} - - ) : ( - <> - {part.output.length === 0 ? ( - No repositories found - ) : ( - - {part.output.map((repoName, index) => ( -
- - {repoName} -
- ))} -
- )} - - )} - - - )} -
- ) -} diff --git a/packages/web/src/features/chat/constants.ts b/packages/web/src/features/chat/constants.ts index c9da5694..c73da27b 100644 --- a/packages/web/src/features/chat/constants.ts +++ b/packages/web/src/features/chat/constants.ts @@ -14,8 +14,7 @@ export const toolNames = { readFiles: 'readFiles', findSymbolReferences: 'findSymbolReferences', findSymbolDefinitions: 'findSymbolDefinitions', - searchRepos: 'searchRepos', - listAllRepos: 'listAllRepos', + listRepos: 'listRepos', } as const; // These part types are visible in the UI. @@ -26,6 +25,5 @@ export const uiVisiblePartTypes: SBChatMessagePart['type'][] = [ 'tool-readFiles', 'tool-findSymbolDefinitions', 'tool-findSymbolReferences', - 'tool-searchRepos', - 'tool-listAllRepos', + 'tool-listRepos', ] as const; \ No newline at end of file diff --git a/packages/web/src/features/chat/tools.ts b/packages/web/src/features/chat/tools.ts index c932e932..c71fab8e 100644 --- a/packages/web/src/features/chat/tools.ts +++ b/packages/web/src/features/chat/tools.ts @@ -6,8 +6,9 @@ import { FileSourceResponse, getFileSource } from '@/features/git'; import { findSearchBasedSymbolDefinitions, findSearchBasedSymbolReferences } from "../codeNav/api"; import { addLineNumbers, buildSearchQuery } from "./utils"; import { toolNames } from "./constants"; -import { getRepos } from "@/actions"; -import Fuse from "fuse.js"; +import { listReposQueryParamsSchema } from "@/lib/schemas"; +import { ListReposQueryParams } from "@/lib/types"; +import { listRepos } from "@/app/api/(server)/repos/listReposApi"; // @NOTE: When adding a new tool, follow these steps: // 1. Add the tool to the `toolNames` constant in `constants.ts`. @@ -217,58 +218,21 @@ export type SearchCodeToolInput = InferToolInput>; export type SearchCodeToolUIPart = ToolUIPart<{ [toolNames.searchCode]: SearchCodeTool }>; -export const searchReposTool = tool({ - description: `Search for repositories by name using fuzzy search. This helps find repositories in the codebase when you know part of their name.`, - inputSchema: z.object({ - query: z.string().describe("The search query to find repositories by name (supports fuzzy matching)"), - limit: z.number().default(10).describe("Maximum number of repositories to return (default: 10)") - }), - execute: async ({ query, limit }) => { - const reposResponse = await getRepos(); - - if (isServiceError(reposResponse)) { - return reposResponse; - } - - // Configure Fuse.js for fuzzy searching - const fuse = new Fuse(reposResponse, { - keys: [ - { name: 'repoName', weight: 0.7 }, - { name: 'repoDisplayName', weight: 0.3 } - ], - threshold: 0.4, // Lower threshold = more strict matching - includeScore: true, - minMatchCharLength: 1, - }); - - const searchResults = fuse.search(query, { limit: limit ?? 10 }); - - searchResults.sort((a, b) => (a.score ?? 0) - (b.score ?? 0)); - - return searchResults.map(({ item }) => item.repoName); - } -}); - -export type SearchReposTool = InferUITool; -export type SearchReposToolInput = InferToolInput; -export type SearchReposToolOutput = InferToolOutput; -export type SearchReposToolUIPart = ToolUIPart<{ [toolNames.searchRepos]: SearchReposTool }>; - -export const listAllReposTool = tool({ - description: `Lists all repositories in the codebase. This provides a complete overview of all available repositories.`, - inputSchema: z.object({}), - execute: async () => { - const reposResponse = await getRepos(); +export const listReposTool = tool({ + description: 'Lists repositories in the organization with optional filtering and pagination.', + inputSchema: listReposQueryParamsSchema, + execute: async (request: ListReposQueryParams) => { + const reposResponse = await listRepos(request); if (isServiceError(reposResponse)) { return reposResponse; } - return reposResponse.map((repo) => repo.repoName); + return reposResponse.data.map((repo) => repo.repoName); } }); -export type ListAllReposTool = InferUITool; -export type ListAllReposToolInput = InferToolInput; -export type ListAllReposToolOutput = InferToolOutput; -export type ListAllReposToolUIPart = ToolUIPart<{ [toolNames.listAllRepos]: ListAllReposTool }>; +export type ListReposTool = InferUITool; +export type ListReposToolInput = InferToolInput; +export type ListReposToolOutput = InferToolOutput; +export type ListReposToolUIPart = ToolUIPart<{ [toolNames.listRepos]: ListReposTool }>; diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 71ee959a..d6f82602 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -3,7 +3,7 @@ import { BaseEditor, Descendant } from "slate"; import { HistoryEditor } from "slate-history"; import { ReactEditor, RenderElementProps } from "slate-react"; import { z } from "zod"; -import { FindSymbolDefinitionsTool, FindSymbolReferencesTool, ReadFilesTool, SearchCodeTool, SearchReposTool, ListAllReposTool } from "./tools"; +import { FindSymbolDefinitionsTool, FindSymbolReferencesTool, ReadFilesTool, SearchCodeTool, ListReposTool } from "./tools"; import { toolNames } from "./constants"; import { LanguageModel } from "@sourcebot/schemas/v3/index.type"; @@ -83,8 +83,7 @@ export type SBChatMessageToolTypes = { [toolNames.readFiles]: ReadFilesTool, [toolNames.findSymbolReferences]: FindSymbolReferencesTool, [toolNames.findSymbolDefinitions]: FindSymbolDefinitionsTool, - [toolNames.searchRepos]: SearchReposTool, - [toolNames.listAllRepos]: ListAllReposTool, + [toolNames.listRepos]: ListReposTool, } export type SBChatMessageDataParts = { From 6b71879b11fe63d930c7d1085e08e6c9b7cfcc96 Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 19:25:03 -0800 Subject: [PATCH 05/13] fix client --- packages/web/src/app/api/(client)/client.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/web/src/app/api/(client)/client.ts b/packages/web/src/app/api/(client)/client.ts index 6d0745e4..7cb2f95c 100644 --- a/packages/web/src/app/api/(client)/client.ts +++ b/packages/web/src/app/api/(client)/client.ts @@ -38,7 +38,7 @@ export const search = async (body: SearchRequest): Promise => { - const url = new URL("/api/source"); + const url = new URL("/api/source", window.location.origin); for (const [key, value] of Object.entries(queryParams)) { url.searchParams.set(key, value.toString()); } @@ -54,7 +54,7 @@ export const getFileSource = async (queryParams: FileSourceRequest): Promise => { - const url = new URL("/api/repos"); + const url = new URL("/api/repos", window.location.origin); for (const [key, value] of Object.entries(queryParams)) { url.searchParams.set(key, value.toString()); } From e6895909caa7bf44b665397b6dba4f33c7069192 Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 19:33:44 -0800 Subject: [PATCH 06/13] improve search tool --- .../tools/searchCodeToolComponent.tsx | 25 +-- packages/web/src/features/chat/tools.ts | 107 ++++++++---- packages/web/src/features/chat/utils.test.ts | 163 +----------------- packages/web/src/features/chat/utils.ts | 36 ---- 4 files changed, 72 insertions(+), 259 deletions(-) diff --git a/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx index 53287b88..ca266c59 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx @@ -1,17 +1,12 @@ 'use client'; import { SearchCodeToolUIPart } from "@/features/chat/tools"; -import { createPathWithQueryParams, isServiceError } from "@/lib/utils"; +import { isServiceError } from "@/lib/utils"; import { useMemo, useState } from "react"; import { FileListItem, ToolHeader, TreeList } from "./shared"; import { CodeSnippet } from "@/app/components/codeSnippet"; import { Separator } from "@/components/ui/separator"; import { SearchIcon } from "lucide-react"; -import Link from "next/link"; -import { SearchQueryParams } from "@/lib/types"; -import { PlayIcon } from "@radix-ui/react-icons"; -import { buildSearchQuery } from "@/features/chat/utils"; -import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; export const SearchCodeToolComponent = ({ part }: { part: SearchCodeToolUIPart }) => { const [isExpanded, setIsExpanded] = useState(false); @@ -21,14 +16,7 @@ export const SearchCodeToolComponent = ({ part }: { part: SearchCodeToolUIPart } return ''; } - const query = buildSearchQuery({ - query: part.input.queryRegexp, - repoNamesFilterRegexp: part.input.repoNamesFilterRegexp, - languageNamesFilter: part.input.languageNamesFilter, - fileNamesFilterRegexp: part.input.fileNamesFilterRegexp, - }); - - return query; + return part.input.query; }, [part]); const label = useMemo(() => { @@ -76,15 +64,6 @@ export const SearchCodeToolComponent = ({ part }: { part: SearchCodeToolUIPart } })} )} - - - Manually run query - )} diff --git a/packages/web/src/features/chat/tools.ts b/packages/web/src/features/chat/tools.ts index c71fab8e..dac75f96 100644 --- a/packages/web/src/features/chat/tools.ts +++ b/packages/web/src/features/chat/tools.ts @@ -4,11 +4,12 @@ import { InferToolInput, InferToolOutput, InferUITool, tool, ToolUIPart } from " import { isServiceError } from "@/lib/utils"; import { FileSourceResponse, getFileSource } from '@/features/git'; import { findSearchBasedSymbolDefinitions, findSearchBasedSymbolReferences } from "../codeNav/api"; -import { addLineNumbers, buildSearchQuery } from "./utils"; +import { addLineNumbers } from "./utils"; import { toolNames } from "./constants"; import { listReposQueryParamsSchema } from "@/lib/schemas"; import { ListReposQueryParams } from "@/lib/types"; import { listRepos } from "@/app/api/(server)/repos/listReposApi"; +import escapeStringRegexp from "escape-string-regexp"; // @NOTE: When adding a new tool, follow these steps: // 1. Add the tool to the `toolNames` constant in `constants.ts`. @@ -114,7 +115,6 @@ export const readFilesTool = tool({ path, repo: repository, ref: revision, - // @todo(mt): handle multi-tenancy. }); })); @@ -138,58 +138,89 @@ export type ReadFilesToolInput = InferToolInput; export type ReadFilesToolOutput = InferToolOutput; export type ReadFilesToolUIPart = ToolUIPart<{ [toolNames.readFiles]: ReadFilesTool }> +const DEFAULT_SEARCH_LIMIT = 100; + export const createCodeSearchTool = (selectedRepos: string[]) => tool({ - description: `Fetches code that matches the provided regex pattern in \`query\`. This is NOT a semantic search. - Results are returned as an array of matching files, with the file's URL, repository, and language.`, + description: `Searches for code that matches the provided search query as a substring by default, or as a regular expression if useRegex is true. Useful for exploring remote repositories by searching for exact symbols, functions, variables, or specific code patterns. To determine if a repository is indexed, use the \`listRepos\` tool. By default, searches are global and will search the default branch of all repositories. Searches can be scoped to specific repositories, languages, and branches.`, inputSchema: z.object({ - queryRegexp: z + query: z .string() - .describe(`The regex pattern to search for in the code. - -Queries consist of space-seperated regular expressions. Wrapping expressions in "" combines them. By default, a file must have at least one match for each expression to be included. Examples: - -\`foo\` - Match files with regex /foo/ -\`foo bar\` - Match files with regex /foo/ and /bar/ -\`"foo bar"\` - Match files with regex /foo bar/ -\`console.log\` - Match files with regex /console.log/ - -Multiple expressions can be or'd together with or, negated with -, or grouped with (). Examples: -\`foo or bar\` - Match files with regex /foo/ or /bar/ -\`foo -bar\` - Match files with regex /foo/ but not /bar/ -\`foo (bar or baz)\` - Match files with regex /foo/ and either /bar/ or /baz/ -`), - repoNamesFilterRegexp: z + .describe(`The search pattern to match against code contents. Do not escape quotes in your query.`) + // Escape backslashes first, then quotes, and wrap in double quotes + // so the query is treated as a literal phrase (like grep). + .transform((val) => { + const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); + return `"${escaped}"`; + }), + useRegex: z + .boolean() + .describe(`Whether to use regular expression matching to match the search query against code contents. When false, substring matching is used. (default: false)`) + .optional(), + filterByRepos: z .array(z.string()) - .describe(`Filter results from repos that match the regex. By default all repos are searched.`) + .describe(`Scope the search to the provided repositories.`) .optional(), - languageNamesFilter: z + filterByLanguages: z .array(z.string()) - .describe(`Scope the search to the provided languages. The language MUST be formatted as a GitHub linguist language. Examples: Python, JavaScript, TypeScript, Java, C#, C++, PHP, Go, Rust, Ruby, Swift, Kotlin, Shell, C, Dart, HTML, CSS, PowerShell, SQL, R`) + .describe(`Scope the search to the provided languages.`) .optional(), - fileNamesFilterRegexp: z + filterByFilepaths: z .array(z.string()) - .describe(`Filter results from filepaths that match the regex. When this option is not specified, all files are searched.`) + .describe(`Scope the search to the provided filepaths.`) + .optional(), + caseSensitive: z + .boolean() + .describe(`Whether the search should be case sensitive (default: false).`) + .optional(), + ref: z + .string() + .describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`) + .optional(), + limit: z + .number() + .default(DEFAULT_SEARCH_LIMIT) + .describe(`Maximum number of matches to return (default: ${DEFAULT_SEARCH_LIMIT})`) .optional(), - limit: z.number().default(10).describe("Maximum number of matches to return (default: 100)"), }), - execute: async ({ queryRegexp: _query, repoNamesFilterRegexp, languageNamesFilter, fileNamesFilterRegexp, limit }) => { - const query = buildSearchQuery({ - query: _query, - repoNamesFilter: selectedRepos, - repoNamesFilterRegexp, - languageNamesFilter, - fileNamesFilterRegexp, - }); + execute: async ({ + query, + useRegex = false, + filterByRepos: repos = [], + filterByLanguages: languages = [], + filterByFilepaths: filepaths = [], + caseSensitive = false, + ref, + limit = DEFAULT_SEARCH_LIMIT, + }) => { + + if (selectedRepos.length > 0) { + query += ` reposet:${selectedRepos.join(',')}`; + } + + if (repos.length > 0) { + query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; + } + + if (languages.length > 0) { + query += ` (lang:${languages.join(' or lang:')})`; + } + + if (filepaths.length > 0) { + query += ` (file:${filepaths.map(filepath => escapeStringRegexp(filepath)).join(' or file:')})`; + } + + if (ref) { + query += ` (rev:${ref})`; + } const response = await search({ queryType: 'string', query, options: { - matches: limit ?? 100, + matches: limit, contextLines: 3, - whole: false, - isCaseSensitivityEnabled: true, - isRegexEnabled: true, + isCaseSensitivityEnabled: caseSensitive, + isRegexEnabled: useRegex, } }); diff --git a/packages/web/src/features/chat/utils.test.ts b/packages/web/src/features/chat/utils.test.ts index 698fbe42..5c0932eb 100644 --- a/packages/web/src/features/chat/utils.test.ts +++ b/packages/web/src/features/chat/utils.test.ts @@ -1,5 +1,5 @@ import { expect, test, vi } from 'vitest' -import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps, repairReferences, buildSearchQuery } from './utils' +import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps, repairReferences } from './utils' import { FILE_REFERENCE_REGEX, ANSWER_TAG } from './constants'; import { SBChatMessage, SBChatMessagePart } from './types'; @@ -351,164 +351,3 @@ test('repairReferences handles malformed inline code blocks', () => { const expected = 'See @file:{github.com/sourcebot-dev/sourcebot::packages/web/src/auth.ts} for details.'; expect(repairReferences(input)).toBe(expected); }); - -test('buildSearchQuery returns base query when no filters provided', () => { - const result = buildSearchQuery({ - query: 'console.log' - }); - - expect(result).toBe('console.log'); -}); - -test('buildSearchQuery adds repoNamesFilter correctly', () => { - const result = buildSearchQuery({ - query: 'function test', - repoNamesFilter: ['repo1', 'repo2'] - }); - - expect(result).toBe('function test reposet:repo1,repo2'); -}); - -test('buildSearchQuery adds single repoNamesFilter correctly', () => { - const result = buildSearchQuery({ - query: 'function test', - repoNamesFilter: ['myrepo'] - }); - - expect(result).toBe('function test reposet:myrepo'); -}); - -test('buildSearchQuery ignores empty repoNamesFilter', () => { - const result = buildSearchQuery({ - query: 'function test', - repoNamesFilter: [] - }); - - expect(result).toBe('function test'); -}); - -test('buildSearchQuery adds languageNamesFilter correctly', () => { - const result = buildSearchQuery({ - query: 'class definition', - languageNamesFilter: ['typescript', 'javascript'] - }); - - expect(result).toBe('class definition ( lang:typescript or lang:javascript )'); -}); - -test('buildSearchQuery adds single languageNamesFilter correctly', () => { - const result = buildSearchQuery({ - query: 'class definition', - languageNamesFilter: ['python'] - }); - - expect(result).toBe('class definition ( lang:python )'); -}); - -test('buildSearchQuery ignores empty languageNamesFilter', () => { - const result = buildSearchQuery({ - query: 'class definition', - languageNamesFilter: [] - }); - - expect(result).toBe('class definition'); -}); - -test('buildSearchQuery adds fileNamesFilterRegexp correctly', () => { - const result = buildSearchQuery({ - query: 'import statement', - fileNamesFilterRegexp: ['*.ts', '*.js'] - }); - - expect(result).toBe('import statement ( file:*.ts or file:*.js )'); -}); - -test('buildSearchQuery adds single fileNamesFilterRegexp correctly', () => { - const result = buildSearchQuery({ - query: 'import statement', - fileNamesFilterRegexp: ['*.tsx'] - }); - - expect(result).toBe('import statement ( file:*.tsx )'); -}); - -test('buildSearchQuery ignores empty fileNamesFilterRegexp', () => { - const result = buildSearchQuery({ - query: 'import statement', - fileNamesFilterRegexp: [] - }); - - expect(result).toBe('import statement'); -}); - -test('buildSearchQuery adds repoNamesFilterRegexp correctly', () => { - const result = buildSearchQuery({ - query: 'bug fix', - repoNamesFilterRegexp: ['org/repo1', 'org/repo2'] - }); - - expect(result).toBe('bug fix ( repo:org/repo1 or repo:org/repo2 )'); -}); - -test('buildSearchQuery adds single repoNamesFilterRegexp correctly', () => { - const result = buildSearchQuery({ - query: 'bug fix', - repoNamesFilterRegexp: ['myorg/myrepo'] - }); - - expect(result).toBe('bug fix ( repo:myorg/myrepo )'); -}); - -test('buildSearchQuery ignores empty repoNamesFilterRegexp', () => { - const result = buildSearchQuery({ - query: 'bug fix', - repoNamesFilterRegexp: [] - }); - - expect(result).toBe('bug fix'); -}); - -test('buildSearchQuery combines multiple filters correctly', () => { - const result = buildSearchQuery({ - query: 'authentication', - repoNamesFilter: ['backend', 'frontend'], - languageNamesFilter: ['typescript', 'javascript'], - fileNamesFilterRegexp: ['*.ts', '*.js'], - repoNamesFilterRegexp: ['org/auth-*'] - }); - - expect(result).toBe( - 'authentication reposet:backend,frontend ( lang:typescript or lang:javascript ) ( file:*.ts or file:*.js ) ( repo:org/auth-* )' - ); -}); - -test('buildSearchQuery handles mixed empty and non-empty filters', () => { - const result = buildSearchQuery({ - query: 'error handling', - repoNamesFilter: [], - languageNamesFilter: ['python'], - fileNamesFilterRegexp: [], - repoNamesFilterRegexp: ['error/*'] - }); - - expect(result).toBe('error handling ( lang:python ) ( repo:error/* )'); -}); - -test('buildSearchQuery handles empty base query', () => { - const result = buildSearchQuery({ - query: '', - repoNamesFilter: ['repo1'], - languageNamesFilter: ['typescript'] - }); - - expect(result).toBe(' reposet:repo1 ( lang:typescript )'); -}); - -test('buildSearchQuery handles query with special characters', () => { - const result = buildSearchQuery({ - query: 'console.log("hello world")', - repoNamesFilter: ['test-repo'] - }); - - expect(result).toBe('console.log("hello world") reposet:test-repo'); -}); \ No newline at end of file diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index c64f1ed3..afc00ac8 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -332,42 +332,6 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStre return undefined; } -export const buildSearchQuery = (options: { - query: string, - repoNamesFilter?: string[], - repoNamesFilterRegexp?: string[], - languageNamesFilter?: string[], - fileNamesFilterRegexp?: string[], -}) => { - const { - query: _query, - repoNamesFilter, - repoNamesFilterRegexp, - languageNamesFilter, - fileNamesFilterRegexp, - } = options; - - let query = `${_query}`; - - if (repoNamesFilter && repoNamesFilter.length > 0) { - query += ` reposet:${repoNamesFilter.join(',')}`; - } - - if (languageNamesFilter && languageNamesFilter.length > 0) { - query += ` ( lang:${languageNamesFilter.join(' or lang:')} )`; - } - - if (fileNamesFilterRegexp && fileNamesFilterRegexp.length > 0) { - query += ` ( file:${fileNamesFilterRegexp.join(' or file:')} )`; - } - - if (repoNamesFilterRegexp && repoNamesFilterRegexp.length > 0) { - query += ` ( repo:${repoNamesFilterRegexp.join(' or repo:')} )`; - } - - return query; -} - /** * Generates a unique key given a LanguageModelInfo object. */ From f04dd335cf3495848d54516ef945cf7f4c265092 Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 21:22:04 -0800 Subject: [PATCH 07/13] add apiHandler to chat/blocking --- packages/web/src/app/api/(server)/chat/blocking/route.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/web/src/app/api/(server)/chat/blocking/route.ts b/packages/web/src/app/api/(server)/chat/blocking/route.ts index 4c24cb19..65766a8c 100644 --- a/packages/web/src/app/api/(server)/chat/blocking/route.ts +++ b/packages/web/src/app/api/(server)/chat/blocking/route.ts @@ -12,10 +12,11 @@ import { createLogger } from "@sourcebot/shared"; import { randomUUID } from "crypto"; import { StatusCodes } from "http-status-codes"; import { headers } from "next/headers"; -import { NextResponse } from "next/server"; +import { NextRequest, NextResponse } from "next/server"; import { z } from "zod"; import { createMessageStream } from "../route"; import { InferUIMessageChunk, UITools, UIDataTypes, UIMessage } from "ai"; +import { apiHandler } from "@/lib/apiHandler"; const logger = createLogger('chat-blocking-api'); @@ -51,7 +52,7 @@ interface BlockingChatResponse { * The chat session is persisted to the database, allowing users to view the full * conversation (including tool calls and reasoning) in the web UI. */ -export async function POST(request: Request) { +export const POST = apiHandler(async (request: NextRequest) => { const requestBody = await request.json(); const parsed = await blockingChatRequestSchema.safeParseAsync(requestBody); @@ -193,7 +194,7 @@ export async function POST(request: Request) { } return NextResponse.json(response); -} +}); const blockStreamUntilFinish = async >(stream: ReadableStream>) => { const reader = stream.getReader(); From 2654454812b9259804fe4b932e93c10d4e5097fc Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 21:26:21 -0800 Subject: [PATCH 08/13] feedback --- packages/web/src/app/api/(server)/chat/blocking/route.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/web/src/app/api/(server)/chat/blocking/route.ts b/packages/web/src/app/api/(server)/chat/blocking/route.ts index 65766a8c..89c2b854 100644 --- a/packages/web/src/app/api/(server)/chat/blocking/route.ts +++ b/packages/web/src/app/api/(server)/chat/blocking/route.ts @@ -105,8 +105,7 @@ export const POST = apiHandler(async (request: NextRequest) => { parts: [{ type: 'text', text: query }], }; - const selectedSearchScopes: SearchScope[] = []; - for (const repo of repos) { + const selectedSearchScopes = await Promise.all(repos.map(async (repo) => { const repoDB = await prisma.repo.findFirst({ where: { name: repo, @@ -121,13 +120,13 @@ export const POST = apiHandler(async (request: NextRequest) => { }) } - selectedSearchScopes.push({ + return { type: 'repo', value: repoDB.name, name: repoDB.displayName ?? repoDB.name.split('/').pop() ?? repoDB.name, codeHostType: repoDB.external_codeHostType, - }) - } + } satisfies SearchScope; + })); // We'll capture the final messages and usage from the stream let finalMessages: SBChatMessage[] = []; From 1561c72f7dca2deb911dc60e1568aef87ed3726f Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 21:31:57 -0800 Subject: [PATCH 09/13] Add wa_chat_message_sent event to ask --- .../src/features/chat/components/chatThread/chatThread.tsx | 7 +++++++ packages/web/src/lib/posthogEvents.ts | 3 +++ 2 files changed, 10 insertions(+) diff --git a/packages/web/src/features/chat/components/chatThread/chatThread.tsx b/packages/web/src/features/chat/components/chatThread/chatThread.tsx index b6c7cbf8..2972ea53 100644 --- a/packages/web/src/features/chat/components/chatThread/chatThread.tsx +++ b/packages/web/src/features/chat/components/chatThread/chatThread.tsx @@ -25,6 +25,7 @@ import { RepositoryQuery, SearchContextQuery } from '@/lib/types'; import { generateAndUpdateChatNameFromMessage } from '../../actions'; import { isServiceError } from '@/lib/utils'; import { NotConfiguredErrorBanner } from '../notConfiguredErrorBanner'; +import useCaptureEvent from '@/hooks/useCaptureEvent'; type ChatHistoryState = { scrollOffset?: number; @@ -61,6 +62,7 @@ export const ChatThread = ({ const { toast } = useToast(); const router = useRouter(); const [isContextSelectorOpen, setIsContextSelectorOpen] = useState(false); + const captureEvent = useCaptureEvent(); // Initial state is from attachments that exist in in the chat history. const [sources, setSources] = useState( @@ -118,6 +120,10 @@ export const ChatThread = ({ } satisfies AdditionalChatRequestParams, }); + captureEvent('wa_chat_message_sent', { + messageCount: messages.length + 1, + }); + if ( messages.length === 0 && message.parts.length > 0 && @@ -148,6 +154,7 @@ export const ChatThread = ({ toast, chatId, router, + captureEvent, ]); diff --git a/packages/web/src/lib/posthogEvents.ts b/packages/web/src/lib/posthogEvents.ts index 74e0d647..c264316f 100644 --- a/packages/web/src/lib/posthogEvents.ts +++ b/packages/web/src/lib/posthogEvents.ts @@ -151,6 +151,9 @@ export type PosthogEventMap = { messageId: string, }, wa_chat_thread_created: {}, + wa_chat_message_sent: { + messageCount: number, + }, ////////////////////////////////////////////////////////////////// wa_demo_docs_link_pressed: {}, wa_demo_search_example_card_pressed: { From e8f9d29bfdaca74d7c9b30a1a9e16127359cd646 Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 21:35:49 -0800 Subject: [PATCH 10/13] add wa_chat_tool_used event --- packages/web/src/features/chat/agent.ts | 6 ++++++ packages/web/src/lib/posthogEvents.ts | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index cbd6a54f..f77b60b5 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -1,5 +1,6 @@ import { getFileSource } from '@/features/git'; import { isServiceError } from "@/lib/utils"; +import { captureEvent } from "@/lib/posthog"; import { ProviderOptions } from "@ai-sdk/provider-utils"; import { createLogger, env } from "@sourcebot/shared"; import { env as clientEnv } from "@sourcebot/shared/client"; @@ -82,6 +83,11 @@ export const createAgentStream = async ({ toolChoice: "auto", onStepFinish: ({ toolResults }) => { toolResults.forEach(({ toolName, output, dynamic }) => { + captureEvent('wa_chat_tool_used', { + toolName, + success: !isServiceError(output), + }); + if (dynamic || isServiceError(output)) { return; } diff --git a/packages/web/src/lib/posthogEvents.ts b/packages/web/src/lib/posthogEvents.ts index c264316f..cec5b5b1 100644 --- a/packages/web/src/lib/posthogEvents.ts +++ b/packages/web/src/lib/posthogEvents.ts @@ -154,6 +154,10 @@ export type PosthogEventMap = { wa_chat_message_sent: { messageCount: number, }, + wa_chat_tool_used: { + toolName: string, + success: boolean, + }, ////////////////////////////////////////////////////////////////// wa_demo_docs_link_pressed: {}, wa_demo_search_example_card_pressed: { From 5b898968ecae4c27d1e4076bbb19a4f15ca0a2af Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 21:42:03 -0800 Subject: [PATCH 11/13] feedback --- packages/mcp/src/client.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 70961ec9..dfdc225d 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -120,6 +120,7 @@ export const askCodebase = async (request: AskCodebaseRequest): Promise Date: Sat, 31 Jan 2026 21:58:00 -0800 Subject: [PATCH 12/13] docs --- docs/docs/features/mcp-server.mdx | 74 ++++++++++++++++++++++++------- packages/mcp/README.md | 14 ++++++ 2 files changed, 72 insertions(+), 16 deletions(-) diff --git a/docs/docs/features/mcp-server.mdx b/docs/docs/features/mcp-server.mdx index 8390dfde..d993a386 100644 --- a/docs/docs/features/mcp-server.mdx +++ b/docs/docs/features/mcp-server.mdx @@ -147,32 +147,74 @@ The [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP) ### `search_code` -Fetches code that matches the provided regex pattern in `query`. +Searches for code that matches the provided search query as a substring by default, or as a regular expression if `useRegex` is true. Parameters: -| Name | Required | Description | -|:----------------------|:---------|:----------------------------------------------------------------------------------------------------------------------------------| -| `query` | yes | Regex pattern to search for. Escape special characters and spaces with a single backslash (e.g., 'console\.log', 'console\ log'). | -| `filterByRepoIds` | no | Restrict search to specific repository IDs (from 'list_repos'). Leave empty to search all. | -| `filterByLanguages` | no | Restrict search to specific languages (GitHub linguist format, e.g., Python, JavaScript). | -| `caseSensitive` | no | Case sensitive search (default: false). | -| `includeCodeSnippets` | no | Include code snippets in results (default: false). | -| `maxTokens` | no | Max tokens to return (default: env.DEFAULT_MINIMUM_TOKENS). | +| Name | Required | Description | +|:----------------------|:---------|:---------------------------------------------------------------------------------------------------------------------| +| `query` | yes | The search pattern to match against code contents. Do not escape quotes in your query. | +| `useRegex` | no | Whether to use regular expression matching. When false, substring matching is used (default: false). | +| `filterByRepos` | no | Scope the search to specific repositories. | +| `filterByLanguages` | no | Scope the search to specific languages. | +| `filterByFilepaths` | no | Scope the search to specific filepaths. | +| `caseSensitive` | no | Whether the search should be case sensitive (default: false). | +| `includeCodeSnippets` | no | Whether to include code snippets in the response (default: false). | +| `ref` | no | Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch. | +| `maxTokens` | no | The maximum number of tokens to return (default: 10000). | ### `list_repos` -Lists all repositories indexed by Sourcebot. +Lists repositories indexed by Sourcebot with optional filtering and pagination. -### `get_file_source` +Parameters: +| Name | Required | Description | +|:------------|:---------|:--------------------------------------------------------------------------------| +| `query` | no | Filter repositories by name (case-insensitive). | +| `page` | no | Page number for pagination (min 1, default: 1). | +| `perPage` | no | Results per page for pagination (min 1, max 100, default: 30). | +| `sort` | no | Sort repositories by 'name' or 'pushed' (most recent commit). Default: 'name'. | +| `direction` | no | Sort direction: 'asc' or 'desc' (default: 'asc'). | + + +### `read_file` + +Reads the source code for a given file. + +Parameters: +| Name | Required | Description | +|:-------|:---------|:-------------------------------------------------------------------------------------------------------| +| `repo` | yes | The repository name. | +| `path` | yes | The path to the file. | +| `ref` | no | Commit SHA, branch or tag name to fetch the source code for. If not provided, uses the default branch. | + + +### `list_commits` + +Get a list of commits for a given repository. + +Parameters: +| Name | Required | Description | +|:----------|:---------|:-----------------------------------------------------------------------------------------------------------------------| +| `repo` | yes | The name of the repository to list commits for. | +| `query` | no | Search query to filter commits by message content (case-insensitive). | +| `since` | no | Show commits more recent than this date. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago'). | +| `until` | no | Show commits older than this date. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday'). | +| `author` | no | Filter commits by author name or email (case-insensitive). | +| `ref` | no | Commit SHA, branch or tag name to list commits of. If not provided, uses the default branch. | +| `page` | no | Page number for pagination (min 1, default: 1). | +| `perPage` | no | Results per page for pagination (min 1, max 100, default: 50). | + + +### `ask_codebase` -Fetches the source code for a given file. +Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. Returns a detailed answer in markdown format with code references, plus a link to view the full research session in the Sourcebot web UI. Parameters: -| Name | Required | Description | -|:-------------|:---------|:-----------------------------------------------------------------| -| `fileName` | yes | The file to fetch the source code for. | -| `repoId` | yes | The Sourcebot repository ID. | +| Name | Required | Description | +|:--------|:---------|:------------------------------------------------------------------------------------------------------------------| +| `query` | yes | The query to ask about the codebase. | +| `repos` | no | The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible. | ## Environment Variables diff --git a/packages/mcp/README.md b/packages/mcp/README.md index cfa6d949..5139ebbf 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -234,6 +234,20 @@ Get a list of commits for a given repository. +### ask_codebase + +Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. Returns a detailed answer in markdown format with code references, plus a link to view the full research session in the Sourcebot web UI. + +
+Parameters + +| Name | Required | Description | +|:--------|:---------|:------------------------------------------------------------------------------------------------------------------| +| `query` | yes | The query to ask about the codebase. | +| `repos` | no | The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible. | + +
+ ## Supported Code Hosts Sourcebot supports the following code hosts: From c3cf085aa2ba83abbba10e3031357d1c515408ea Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 31 Jan 2026 22:27:20 -0800 Subject: [PATCH 13/13] add mechanism for specifying language model explicitly --- docs/docs/features/mcp-server.mdx | 18 +++++++--- packages/mcp/README.md | 20 ++++++++--- packages/mcp/src/client.ts | 24 +++++++++++-- packages/mcp/src/index.ts | 19 +++++++++- packages/mcp/src/schemas.ts | 17 +++++++++ packages/mcp/src/types.ts | 5 +++ .../app/api/(server)/chat/blocking/route.ts | 35 +++++++++++++++---- .../web/src/app/api/(server)/chat/route.ts | 1 - .../web/src/app/api/(server)/models/route.ts | 24 +++++++++++++ 9 files changed, 143 insertions(+), 20 deletions(-) create mode 100644 packages/web/src/app/api/(server)/models/route.ts diff --git a/docs/docs/features/mcp-server.mdx b/docs/docs/features/mcp-server.mdx index d993a386..a21c4ccc 100644 --- a/docs/docs/features/mcp-server.mdx +++ b/docs/docs/features/mcp-server.mdx @@ -206,15 +206,25 @@ Parameters: | `perPage` | no | Results per page for pagination (min 1, max 100, default: 50). | +### `list_language_models` + +Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling `ask_codebase`. + +Parameters: + +This tool takes no parameters. + + ### `ask_codebase` Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. Returns a detailed answer in markdown format with code references, plus a link to view the full research session in the Sourcebot web UI. Parameters: -| Name | Required | Description | -|:--------|:---------|:------------------------------------------------------------------------------------------------------------------| -| `query` | yes | The query to ask about the codebase. | -| `repos` | no | The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible. | +| Name | Required | Description | +|:----------------|:---------|:-----------------------------------------------------------------------------------------------------------------------------------------------| +| `query` | yes | The query to ask about the codebase. | +| `repos` | no | The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible. | +| `languageModel` | no | The language model to use for answering the question. Object with `provider` and `model`. If not provided, defaults to the first model in the config. Use `list_language_models` to see available options. | ## Environment Variables diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 5139ebbf..537eb92f 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -234,6 +234,17 @@ Get a list of commits for a given repository. +### list_language_models + +Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling `ask_codebase`. + +
+Parameters + +This tool takes no parameters. + +
+ ### ask_codebase Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. Returns a detailed answer in markdown format with code references, plus a link to view the full research session in the Sourcebot web UI. @@ -241,10 +252,11 @@ Ask a natural language question about the codebase. This tool uses an AI agent t
Parameters -| Name | Required | Description | -|:--------|:---------|:------------------------------------------------------------------------------------------------------------------| -| `query` | yes | The query to ask about the codebase. | -| `repos` | no | The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible. | +| Name | Required | Description | +|:----------------|:---------|:-----------------------------------------------------------------------------------------------------------------------------------------------| +| `query` | yes | The query to ask about the codebase. | +| `repos` | no | The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible. | +| `languageModel` | no | The language model to use for answering the question. Object with `provider` and `model`. If not provided, defaults to the first model in the config. Use `list_language_models` to see available options. |
diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index dfdc225d..896692a5 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1,6 +1,6 @@ import { env } from './env.js'; -import { listReposResponseSchema, searchResponseSchema, fileSourceResponseSchema, listCommitsResponseSchema, askCodebaseResponseSchema } from './schemas.js'; -import { AskCodebaseRequest, AskCodebaseResponse, FileSourceRequest, ListReposQueryParams, SearchRequest, ListCommitsQueryParamsSchema } from './types.js'; +import { listReposResponseSchema, searchResponseSchema, fileSourceResponseSchema, listCommitsResponseSchema, askCodebaseResponseSchema, listLanguageModelsResponseSchema } from './schemas.js'; +import { AskCodebaseRequest, AskCodebaseResponse, FileSourceRequest, ListReposQueryParams, SearchRequest, ListCommitsQueryParamsSchema, ListLanguageModelsResponse } from './types.js'; import { isServiceError, ServiceErrorException } from './utils.js'; import { z } from 'zod'; @@ -111,7 +111,7 @@ export const listCommits = async (queryParams: ListCommitsQueryParamsSchema) => /** * Asks a natural language question about the codebase using the Sourcebot AI agent. * This is a blocking call that runs the full agent loop and returns when complete. - * + * * @param request - The question and optional repo filters * @returns The agent's answer, chat URL, sources, and metadata */ @@ -128,3 +128,21 @@ export const askCodebase = async (request: AskCodebaseRequest): Promise => { + const response = await fetch(`${env.SOURCEBOT_HOST}/api/models`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + 'X-Sourcebot-Client-Source': 'mcp', + ...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {}) + }, + }); + + return parseResponse(response, listLanguageModelsResponseSchema); +} diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 5b96238d..68e8e8e9 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -6,7 +6,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' import _dedent from "dedent"; import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; -import { askCodebase, getFileSource, listCommits, listRepos, search } from './client.js'; +import { askCodebase, getFileSource, listCommits, listLanguageModels, listRepos, search } from './client.js'; import { env, numberSchema } from './env.js'; import { askCodebaseRequestSchema, fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js'; import { AskCodebaseRequest, FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js'; @@ -238,6 +238,22 @@ server.tool( } ); +server.tool( + "list_language_models", + dedent`Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling ask_codebase.`, + {}, + async () => { + const models = await listLanguageModels(); + + return { + content: [{ + type: "text", + text: JSON.stringify(models), + }], + }; + } +); + server.tool( "ask_codebase", dedent` @@ -262,6 +278,7 @@ server.tool( --- **View full research session:** ${response.chatUrl} + **Model used:** ${response.languageModel.model} `; return { diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index a2b49262..41a9fd45 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -273,6 +273,14 @@ export const listCommitsResponseSchema = z.array(z.object({ author_email: z.string(), })); +export const languageModelInfoSchema = z.object({ + provider: z.string().describe("The model provider (e.g., 'anthropic', 'openai')"), + model: z.string().describe("The model ID"), + displayName: z.string().optional().describe("Optional display name for the model"), +}); + +export const listLanguageModelsResponseSchema = z.array(languageModelInfoSchema); + export const askCodebaseRequestSchema = z.object({ query: z .string() @@ -281,6 +289,10 @@ export const askCodebaseRequestSchema = z.object({ .array(z.string()) .optional() .describe("The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible."), + languageModel: languageModelInfoSchema + .omit({ displayName: true }) + .optional() + .describe("The language model to use for answering the question. If not provided, defaults to the first model in the config. Use list_language_models to see available options."), }); export const sourceSchema = z.object({ @@ -296,4 +308,9 @@ export const askCodebaseResponseSchema = z.object({ answer: z.string().describe("The agent's final answer in markdown format"), chatId: z.string().describe("ID of the persisted chat session"), chatUrl: z.string().describe("URL to view the chat in the web UI"), + languageModel: z.object({ + provider: z.string().describe("The model provider (e.g., 'anthropic', 'openai')"), + model: z.string().describe("The model ID"), + displayName: z.string().optional().describe("Optional display name for the model"), + }).describe("The language model used to generate the response"), }); diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index 8a721970..55266ba1 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -14,6 +14,8 @@ import { listCommitsResponseSchema, askCodebaseRequestSchema, askCodebaseResponseSchema, + languageModelInfoSchema, + listLanguageModelsResponseSchema, } from "./schemas.js"; import { z } from "zod"; @@ -39,3 +41,6 @@ export type ListCommitsResponse = z.infer; export type AskCodebaseRequest = z.infer; export type AskCodebaseResponse = z.infer; + +export type LanguageModelInfo = z.infer; +export type ListLanguageModelsResponse = z.infer; diff --git a/packages/web/src/app/api/(server)/chat/blocking/route.ts b/packages/web/src/app/api/(server)/chat/blocking/route.ts index 89c2b854..f1073fa3 100644 --- a/packages/web/src/app/api/(server)/chat/blocking/route.ts +++ b/packages/web/src/app/api/(server)/chat/blocking/route.ts @@ -1,7 +1,7 @@ import { sew } from "@/actions"; import { _getConfiguredLanguageModelsFull, _getAISDKLanguageModelAndOptions, updateChatMessages, generateAndUpdateChatNameFromMessage } from "@/features/chat/actions"; -import { SBChatMessage, SearchScope } from "@/features/chat/types"; -import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage } from "@/features/chat/utils"; +import { LanguageModelInfo, languageModelInfoSchema, SBChatMessage, SearchScope } from "@/features/chat/types"; +import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; import { ErrorCode } from "@/lib/errorCodes"; import { requestBodySchemaValidationError, ServiceError, ServiceErrorException, serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; @@ -32,6 +32,9 @@ const blockingChatRequestSchema = z.object({ .array(z.string()) .optional() .describe("The repositories that are accessible to the agent during the chat. If not provided, all repositories are accessible."), + languageModel: languageModelInfoSchema + .optional() + .describe("The language model to use for the chat. If not provided, the first configured model is used."), }); /** @@ -41,6 +44,7 @@ interface BlockingChatResponse { answer: string; chatId: string; chatUrl: string; + languageModel: LanguageModelInfo; } /** @@ -60,7 +64,7 @@ export const POST = apiHandler(async (request: NextRequest) => { return serviceErrorResponse(requestBodySchemaValidationError(parsed.error)); } - const { query, repos = [] } = parsed.data; + const { query, repos = [], languageModel: requestedLanguageModel } = parsed.data; const response: BlockingChatResponse | ServiceError = await sew(() => withOptionalAuthV2(async ({ org, user, prisma }) => { @@ -70,13 +74,25 @@ export const POST = apiHandler(async (request: NextRequest) => { return { statusCode: StatusCodes.BAD_REQUEST, errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: "No language models are configured. Please configure at least one language model.", + message: "No language models are configured. Please configure at least one language model. See: https://docs.sourcebot.dev/docs/configuration/language-model-providers", } satisfies ServiceError; } - // @todo: we should probably have a option of passing the language model - // into the request body. For now, just use the first configured model. - const languageModelConfig = configuredModels[0]; + // Use the requested language model if provided, otherwise default to the first configured model + let languageModelConfig = configuredModels[0]; + if (requestedLanguageModel) { + const matchingModel = configuredModels.find( + (m) => getLanguageModelKey(m) === getLanguageModelKey(requestedLanguageModel as LanguageModelInfo) + ); + if (!matchingModel) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Language model '${requestedLanguageModel.provider}/${requestedLanguageModel.model}' is not configured.`, + } satisfies ServiceError; + } + languageModelConfig = matchingModel; + } const { model, providerOptions } = await _getAISDKLanguageModelAndOptions(languageModelConfig); const modelName = languageModelConfig.displayName ?? languageModelConfig.model; @@ -184,6 +200,11 @@ export const POST = apiHandler(async (request: NextRequest) => { answer: portableAnswer, chatId: chat.id, chatUrl, + languageModel: { + provider: languageModelConfig.provider, + model: languageModelConfig.model, + displayName: languageModelConfig.displayName, + }, } satisfies BlockingChatResponse; }) ); diff --git a/packages/web/src/app/api/(server)/chat/route.ts b/packages/web/src/app/api/(server)/chat/route.ts index 0ac70390..057646ae 100644 --- a/packages/web/src/app/api/(server)/chat/route.ts +++ b/packages/web/src/app/api/(server)/chat/route.ts @@ -30,7 +30,6 @@ import { z } from "zod"; const logger = createLogger('chat-api'); const chatRequestSchema = z.object({ - // These paramt messages: z.array(z.any()), id: z.string(), ...additionalChatRequestParamsSchema.shape, diff --git a/packages/web/src/app/api/(server)/models/route.ts b/packages/web/src/app/api/(server)/models/route.ts new file mode 100644 index 00000000..0970ab07 --- /dev/null +++ b/packages/web/src/app/api/(server)/models/route.ts @@ -0,0 +1,24 @@ +import { sew } from "@/actions"; +import { apiHandler } from "@/lib/apiHandler"; +import { getConfiguredLanguageModelsInfo } from "@/features/chat/actions"; +import { serviceErrorResponse } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { withOptionalAuthV2 } from "@/withAuthV2"; + +export const GET = apiHandler(async () => { + const response = await sew(() => + withOptionalAuthV2(async () => { + const models = await getConfiguredLanguageModelsInfo(); + return models; + }) + ); + + if (isServiceError(response)) { + return serviceErrorResponse(response); + } + + return new Response(JSON.stringify(response), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); +});