From 15bb302eea1cac255033ab99cdbeb86b63483c49 Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Mon, 16 Mar 2026 01:25:55 +0100 Subject: [PATCH 01/15] feat(query-language): add SelectExpr to grammar and token list Introduces SelectExpr as a new top-level expression in query.grammar: SelectExpr { selectKw selectValue } selectValue { "repo" | "file" } Also adds "select:" to the PREFIXES list in tokens.ts so it is not tokenised as a plain word by the lezer lexer. The generated parser (parser.ts / parser.terms.ts) must be rebuilt after this change via `yarn build` in packages/queryLanguage. --- packages/queryLanguage/src/query.grammar | 8 ++++++-- packages/queryLanguage/src/tokens.ts | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/queryLanguage/src/query.grammar b/packages/queryLanguage/src/query.grammar index 5cff3c685..0372bedd7 100644 --- a/packages/queryLanguage/src/query.grammar +++ b/packages/queryLanguage/src/query.grammar @@ -47,7 +47,8 @@ PrefixExpr { RepoExpr | LangExpr | SymExpr | - RepoSetExpr + RepoSetExpr | + SelectExpr } RevisionExpr { revisionKw value } @@ -59,6 +60,8 @@ LangExpr { langKw value } SymExpr { symKw value } RepoSetExpr { reposetKw value } +SelectExpr { selectKw value } + // Modifiers ArchivedExpr { archivedKw archivedValue } ForkExpr { forkKw forkValue } @@ -87,6 +90,7 @@ value { quotedString | word } langKw { "lang:" } symKw { "sym:" } reposetKw { "reposet:" } + selectKw { "select:" } // 'or' is now handled by external orToken tokenizer @@ -100,6 +104,6 @@ value { quotedString | word } quotedString, archivedKw, revisionKw, contentKw, contextKw, fileKw, forkKw, visibilityKw, repoKw, langKw, - symKw, reposetKw + symKw, reposetKw, selectKw } } \ No newline at end of file diff --git a/packages/queryLanguage/src/tokens.ts b/packages/queryLanguage/src/tokens.ts index 4fbd656c1..124942f1f 100644 --- a/packages/queryLanguage/src/tokens.ts +++ b/packages/queryLanguage/src/tokens.ts @@ -25,6 +25,7 @@ const PREFIXES = [ "lang:", "sym:", "reposet:", + "select:", ]; function isWhitespace(ch: number): boolean { From 27e10e39db7a8fd221c14d0a5f8398e02daa8d37 Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Mon, 16 Mar 2026 01:26:07 +0100 Subject: [PATCH 02/15] feat(search/parser): extract SelectMode from parsed query tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds SelectMode ('repo' | null) and extractSelectMode() which walks the lezer parse tree looking for a SelectExpr node. parseQuerySyntaxIntoIR() now returns { ir, selectMode } so callers can act on the projection modifier without touching the zoekt IR. SelectExpr maps to { const: true } in the IR — a no-op for zoekt — so the modifier is transparent to the search engine and only processed client-side. Also adds: - RepoResult type (repositoryId, repository, repositoryInfo?, matchCount) exported from @/features/search - select: entry in useRefineModeSuggestions so it surfaces in the refine dropdown alongside the type it represents --- .../searchBar/useRefineModeSuggestions.ts | 5 +++ packages/web/src/features/search/index.ts | 1 + packages/web/src/features/search/parser.ts | 39 ++++++++++++++++++- packages/web/src/features/search/types.ts | 11 ++++++ 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts b/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts index 1c8df2a89..6e51d982a 100644 --- a/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts +++ b/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts @@ -25,6 +25,11 @@ export const useRefineModeSuggestions = () => { description: "Exclude results from the given search context." }, ] : []), + { + value: SearchPrefix.select, + description: "Project results — return matching repositories instead of individual files.", + spotlight: true, + }, { value: SearchPrefix.visibility, description: "Filter on repository visibility." diff --git a/packages/web/src/features/search/index.ts b/packages/web/src/features/search/index.ts index 665e070eb..ef860d459 100644 --- a/packages/web/src/features/search/index.ts +++ b/packages/web/src/features/search/index.ts @@ -12,4 +12,5 @@ export type { StreamedSearchResponse, SearchResultChunk, SearchResponse, + RepoResult, } from './types'; \ No newline at end of file diff --git a/packages/web/src/features/search/parser.ts b/packages/web/src/features/search/parser.ts index b0e1e09af..9e365854f 100644 --- a/packages/web/src/features/search/parser.ts +++ b/packages/web/src/features/search/parser.ts @@ -21,6 +21,7 @@ import { QuotedTerm, Tree, VisibilityExpr, + SelectExpr, } from '@sourcebot/query-language'; import { parser as _parser } from '@sourcebot/query-language'; import { PrismaClient } from '@sourcebot/db'; @@ -64,6 +65,30 @@ const findLinguistLanguage = (value: string): string => { return languageKeyLowerCaseMap.get(value.toLowerCase()) ?? value; } +export type SelectMode = 'repo' | null; + +/** + * Extracts the select: modifier from a Lezer tree. + * Returns 'repo' or null. + */ +const extractSelectMode = (tree: Tree, input: string): SelectMode => { + let selectMode: SelectMode = null; + const cursor = tree.cursor(); + do { + if (cursor.name === 'SelectExpr') { + const text = input.substring(cursor.from, cursor.to); + const colonIndex = text.indexOf(':'); + if (colonIndex !== -1) { + const value = text.substring(colonIndex + 1).trim(); + if (value === 'repo') { + selectMode = value; + } + } + } + } while (cursor.next()); + return selectMode; +} + /** * Given a query string, parses it into the query intermediate representation. */ @@ -78,14 +103,17 @@ export const parseQuerySyntaxIntoIR = async ({ isRegexEnabled?: boolean; }, prisma: PrismaClient, -}): Promise => { +}): Promise<{ ir: QueryIR, selectMode: SelectMode }> => { try { // First parse the query into a Lezer tree. const tree = parser.parse(query); + // Extract the select mode before transforming (select: is a projection modifier, not a zoekt filter) + const selectMode = extractSelectMode(tree, query); + // Then transform the tree into the intermediate representation. - return transformTreeToIR({ + const ir = await transformTreeToIR({ tree, input: query, isCaseSensitivityEnabled: options.isCaseSensitivityEnabled ?? false, @@ -110,6 +138,8 @@ export const parseQuerySyntaxIntoIR = async ({ return context.repos.map((repo) => repo.name); }, }); + + return { ir, selectMode }; } catch (error) { if (error instanceof SyntaxError) { throw new ServiceErrorException({ @@ -415,6 +445,11 @@ const transformTreeToIR = async ({ query: "repo_set" }; } + case SelectExpr: { + // select: is a projection modifier — no-op match-all, optimized away by zoekt + return { const: true, query: "const" }; + } + default: throw new Error(`Unknown prefix type: ${prefixNode.type.name} (id: ${prefixTypeId})`); } diff --git a/packages/web/src/features/search/types.ts b/packages/web/src/features/search/types.ts index e053c8e20..69c030be6 100644 --- a/packages/web/src/features/search/types.ts +++ b/packages/web/src/features/search/types.ts @@ -30,6 +30,14 @@ export const repositoryInfoSchema = z.object({ }); export type RepositoryInfo = z.infer; +export const repoResultSchema = z.object({ + repositoryId: z.number(), + repository: z.string(), + repositoryInfo: repositoryInfoSchema.optional(), + matchCount: z.number(), +}); +export type RepoResult = z.infer; + // @note: Many of these fields are defined in zoekt/api.go. export const searchStatsSchema = z.object({ actualMatchCount: z.number(), // The actual number of matches returned by the search. This will always be less than or equal to `totalMatchCount`. @@ -104,6 +112,7 @@ export const searchResponseSchema = z.object({ files: z.array(searchFileSchema), repositoryInfo: z.array(repositoryInfoSchema), isSearchExhaustive: z.boolean(), + repoResults: z.array(repoResultSchema).optional(), }); export type SearchResponse = z.infer; @@ -115,6 +124,7 @@ export const streamedSearchChunkResponseSchema = z.object({ stats: searchStatsSchema, files: z.array(searchFileSchema), repositoryInfo: z.array(repositoryInfoSchema), + repoResults: z.array(repoResultSchema).optional(), }); export type StreamedSearchChunkResponse = z.infer; @@ -125,6 +135,7 @@ export const streamedSearchFinalResponseSchema = z.object({ type: z.literal('final'), accumulatedStats: searchStatsSchema, isSearchExhaustive: z.boolean(), + repoResults: z.array(repoResultSchema).optional(), }); export type StreamedSearchFinalResponse = z.infer; From be63399ee921c8e608395e403c784cc940282674 Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Mon, 16 Mar 2026 01:26:19 +0100 Subject: [PATCH 03/15] feat(search): implement select:repo projection in the search pipeline When selectMode === 'repo', results are post-processed rather than filtered at the zoekt level: searchApi.ts - applySelectRepo() deduplicates file results by repository and returns RepoResult[] sorted by matchCount desc - search() and streamSearch() detect selectMode and route accordingly zoektSearcher.ts - zoektStreamSearch() accepts selectMode and accumulates a _repoMap across SSE chunks - Intermediate chunks emit files=[], repoResults=[...partial] - Final message emits the complete sorted repoResults list useStreamedSearch.ts - State gains repoResults: RepoResult[] - CacheEntry includes repoResults for instant replay on back/forward --- .../app/[domain]/search/useStreamedSearch.ts | 13 +++- packages/web/src/features/search/searchApi.ts | 75 +++++++++++++------ .../web/src/features/search/zoektSearcher.ts | 26 ++++++- 3 files changed, 88 insertions(+), 26 deletions(-) diff --git a/packages/web/src/app/[domain]/search/useStreamedSearch.ts b/packages/web/src/app/[domain]/search/useStreamedSearch.ts index 1d7c0999f..8f699e635 100644 --- a/packages/web/src/app/[domain]/search/useStreamedSearch.ts +++ b/packages/web/src/app/[domain]/search/useStreamedSearch.ts @@ -1,6 +1,6 @@ 'use client'; -import { RepositoryInfo, SearchRequest, SearchResultFile, SearchStats, StreamedSearchResponse } from '@/features/search'; +import { RepositoryInfo, RepoResult, SearchRequest, SearchResultFile, SearchStats, StreamedSearchResponse } from '@/features/search'; import { ServiceErrorException } from '@/lib/serviceError'; import { isServiceError } from '@/lib/utils'; import * as Sentry from '@sentry/nextjs'; @@ -9,6 +9,7 @@ import { useCallback, useEffect, useRef, useState } from 'react'; interface CacheEntry { files: SearchResultFile[]; repoInfo: Record; + repoResults: RepoResult[]; numMatches: number; timeToSearchCompletionMs: number; timeToFirstSearchResultMs: number; @@ -41,6 +42,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex error: Error | null, files: SearchResultFile[], repoInfo: Record, + repoResults: RepoResult[], timeToSearchCompletionMs: number, timeToFirstSearchResultMs: number, numMatches: number, @@ -51,6 +53,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex error: null, files: [], repoInfo: {}, + repoResults: [], timeToSearchCompletionMs: 0, timeToFirstSearchResultMs: 0, numMatches: 0, @@ -98,6 +101,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex error: null, files: cachedEntry.files, repoInfo: cachedEntry.repoInfo, + repoResults: cachedEntry.repoResults ?? [], timeToSearchCompletionMs: cachedEntry.timeToSearchCompletionMs, timeToFirstSearchResultMs: cachedEntry.timeToFirstSearchResultMs, numMatches: cachedEntry.numMatches, @@ -111,6 +115,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex error: null, files: [], repoInfo: {}, + repoResults: [], timeToSearchCompletionMs: 0, timeToFirstSearchResultMs: 0, numMatches: 0, @@ -200,6 +205,10 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex ...prev.files, ...response.files ], + repoResults: response.repoResults ? [ + ...prev.repoResults, + ...response.repoResults + ] : prev.repoResults, repoInfo: { ...prev.repoInfo, ...response.repositoryInfo.reduce((acc, repo) => { @@ -218,6 +227,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex ...prev, isExhaustive: response.isSearchExhaustive, stats: response.accumulatedStats, + ...(response.repoResults ? { repoResults: response.repoResults } : {}), ...(isFirstMessage ? { timeToFirstSearchResultMs: performance.now() - startTime, } : {}), @@ -237,6 +247,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex searchCache.set(cacheKey, { files: prev.files, repoInfo: prev.repoInfo, + repoResults: prev.repoResults, isExhaustive: prev.isExhaustive, numMatches: prev.numMatches, timeToFirstSearchResultMs: prev.timeToFirstSearchResultMs, diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index b21f05c07..5830aed8f 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -6,8 +6,8 @@ import { PrismaClient, UserWithAccounts } from "@sourcebot/db"; import { env, hasEntitlement } from "@sourcebot/shared"; import { headers } from "next/headers"; import { QueryIR } from './ir'; -import { parseQuerySyntaxIntoIR } from './parser'; -import { SearchOptions } from "./types"; +import { parseQuerySyntaxIntoIR, SelectMode } from './parser'; +import { SearchOptions, SearchResponse, RepoResult } from "./types"; import { createZoektSearchRequest, zoektSearch, zoektStreamSearch } from './zoektSearcher'; @@ -21,7 +21,6 @@ type QueryStringSearchRequest = { type QueryIRSearchRequest = { queryType: 'ir'; query: QueryIR; - // Omit options that are specific to query syntax parsing. options: Omit; source?: string; } @@ -43,12 +42,16 @@ export const search = (request: SearchRequest) => sew(() => const repoSearchScope = await getAccessibleRepoNamesForUser({ user, prisma }); - // If needed, parse the query syntax into the query intermediate representation. - const query = request.queryType === 'string' ? await parseQuerySyntaxIntoIR({ - query: request.query, - options: request.options, - prisma, - }) : request.query; + let selectMode: SelectMode = null; + const query = request.queryType === 'string' ? await (async () => { + const { ir, selectMode: mode } = await parseQuerySyntaxIntoIR({ + query: request.query, + options: request.options, + prisma, + }); + selectMode = mode; + return ir; + })() : request.query; const zoektSearchRequest = await createZoektSearchRequest({ query, @@ -56,7 +59,11 @@ export const search = (request: SearchRequest) => sew(() => repoSearchScope, }); - return zoektSearch(zoektSearchRequest, prisma); + const result = await zoektSearch(zoektSearchRequest, prisma); + if (selectMode === 'repo') { + return applySelectRepo(result); + } + return result; })); export const streamSearch = (request: SearchRequest) => sew(() => @@ -74,12 +81,16 @@ export const streamSearch = (request: SearchRequest) => sew(() => const repoSearchScope = await getAccessibleRepoNamesForUser({ user, prisma }); - // If needed, parse the query syntax into the query intermediate representation. - const query = request.queryType === 'string' ? await parseQuerySyntaxIntoIR({ - query: request.query, - options: request.options, - prisma, - }) : request.query; + let selectMode: SelectMode = null; + const query = request.queryType === 'string' ? await (async () => { + const { ir, selectMode: mode } = await parseQuerySyntaxIntoIR({ + query: request.query, + options: request.options, + prisma, + }); + selectMode = mode; + return ir; + })() : request.query; const zoektSearchRequest = await createZoektSearchRequest({ query, @@ -87,13 +98,9 @@ export const streamSearch = (request: SearchRequest) => sew(() => repoSearchScope, }); - return zoektStreamSearch(zoektSearchRequest, prisma); + return zoektStreamSearch(zoektSearchRequest, prisma, selectMode); })); -/** - * Returns a list of repository names that the user has access to. - * If permission syncing is disabled, returns undefined. - */ const getAccessibleRepoNamesForUser = async ({ user, prisma }: { user?: UserWithAccounts, prisma: PrismaClient }) => { if ( env.PERMISSION_SYNC_ENABLED !== 'true' || @@ -110,3 +117,29 @@ const getAccessibleRepoNamesForUser = async ({ user, prisma }: { user?: UserWith }); return accessibleRepos.map(repo => repo.name); } + +const applySelectRepo = (result: SearchResponse): SearchResponse => { + const repoMap = new Map(); + + for (const file of result.files) { + const repoId = file.repositoryId; + if (!repoMap.has(repoId)) { + const repoInfo = result.repositoryInfo.find(r => r.id === repoId); + repoMap.set(repoId, { + repositoryId: repoId, + repository: file.repository, + repositoryInfo: repoInfo, + matchCount: file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0), + }); + } else { + const existing = repoMap.get(repoId)!; + existing.matchCount += file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); + } + } + + return { + ...result, + repoResults: Array.from(repoMap.values()).sort((a, b) => b.matchCount - a.matchCount), + files: [], + }; +}; diff --git a/packages/web/src/features/search/zoektSearcher.ts b/packages/web/src/features/search/zoektSearcher.ts index 7203521ba..94c53323c 100644 --- a/packages/web/src/features/search/zoektSearcher.ts +++ b/packages/web/src/features/search/zoektSearcher.ts @@ -16,7 +16,7 @@ import { PrismaClient, Repo } from "@sourcebot/db"; import { createLogger, env } from "@sourcebot/shared"; import path from 'path'; import { isBranchQuery, QueryIR, someInQueryIR } from './ir'; -import { RepositoryInfo, SearchResponse, SearchResultFile, SearchStats, SourceRange, StreamedSearchErrorResponse, StreamedSearchResponse } from "./types"; +import { RepositoryInfo, RepoResult, SearchResponse, SearchResultFile, SearchStats, SourceRange, StreamedSearchErrorResponse, StreamedSearchResponse } from "./types"; import { captureEvent } from "@/lib/posthog"; import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; @@ -148,7 +148,7 @@ export const zoektSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: }); } -export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: PrismaClient): Promise => { +export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: PrismaClient, selectMode?: string | null): Promise => { const client = createGrpcClient(); let grpcStream: ReturnType | null = null; let isStreamActive = true; @@ -177,6 +177,8 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p flushReason: ZoektGrpcFlushReason.FLUSH_REASON_UNKNOWN_UNSPECIFIED, }; + const _accumulatedRepoMap = new Map(); + return new ReadableStream({ async start(controller) { const tryCloseController = () => { @@ -185,6 +187,9 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p type: 'final', accumulatedStats, isSearchExhaustive: accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount, + ...(selectMode === 'repo' ? { + repoResults: Array.from(_accumulatedRepoMap.values()).sort((a, b) => b.matchCount - a.matchCount) + } : {}), } controller.enqueue(encodeSSEREsponseChunk(finalResponse)); @@ -234,11 +239,24 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p accumulatedStats = accumulateStats(accumulatedStats, stats); + // Accumulate repo map for select:repo mode + if (selectMode === 'repo') { + for (const file of files) { + const repoId = file.repositoryId; + if (!_accumulatedRepoMap.has(repoId)) { + const ri = repositoryInfo.find(r => r.id === repoId); + _accumulatedRepoMap.set(repoId, { repositoryId: repoId, repository: file.repository, repositoryInfo: ri, matchCount: file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0) }); + } else { + _accumulatedRepoMap.get(repoId)!.matchCount += file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); + } + } + } + const response: StreamedSearchResponse = { type: 'chunk', - files, + files: selectMode === 'repo' ? [] : files, repositoryInfo, - stats + stats, } controller.enqueue(encodeSSEREsponseChunk(response)); From 655eeb708b39b6d7415b2505d313119557656429 Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Mon, 16 Mar 2026 01:26:29 +0100 Subject: [PATCH 04/15] feat(ui): render repository list when select:repo is active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds RepoResultsPanel — a new component that displays the deduplicated list of matching repositories with their match count. Each row is clickable and navigates to a scoped `repo:` search. searchResultsPage now detects select:repo in the active query and swaps the file results panel for RepoResultsPanel. The two modes are mutually exclusive — no layout change is needed for the file panel. --- .../search/components/repoResultsPanel.tsx | 68 +++++++++++++++++++ .../search/components/searchResultsPage.tsx | 37 +++++++++- 2 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx diff --git a/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx b/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx new file mode 100644 index 000000000..05d144e94 --- /dev/null +++ b/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx @@ -0,0 +1,68 @@ +'use client'; + +import { RepoResult, RepositoryInfo } from "@/features/search"; +import { useDomain } from "@/hooks/useDomain"; +import { createPathWithQueryParams } from "@/lib/utils"; +import { useRouter } from "next/navigation"; +import { SearchQueryParams } from "@/lib/types"; + +interface RepoResultsPanelProps { + repoResults: RepoResult[]; + searchQuery: string; +} + +export const RepoResultsPanel = ({ repoResults, searchQuery }: RepoResultsPanelProps) => { + const domain = useDomain(); + const router = useRouter(); + + const navigateToRepo = (repoName: string) => { + // Replace select:repo with repo:xxx, preserving all other filters + const newQuery = searchQuery + .replace(/(?:^|\s)select:repo(?:\s|$)/g, ' ') + .trim() + .concat(` repo:${repoName}`) + .trim(); + const path = createPathWithQueryParams( + `/${domain}/search`, + [SearchQueryParams.query, newQuery], + ); + router.push(path); + }; + + if (repoResults.length === 0) { + return ( +
+ No repositories found +
+ ); + } + + return ( +
+
+ {repoResults.length} {repoResults.length === 1 ? "repository" : "repositories"} matched +
+ {repoResults.map((repo) => ( + + ))} +
+ ); +}; diff --git a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx index a8a7c9139..9734b2487 100644 --- a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx @@ -11,7 +11,7 @@ import { } from "@/components/ui/resizable"; import { Separator } from "@/components/ui/separator"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; -import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; +import { RepoResult, RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useDomain } from "@/hooks/useDomain"; import { useNonEmptyQueryParam } from "@/hooks/useNonEmptyQueryParam"; @@ -33,6 +33,7 @@ import { CodePreviewPanel } from "./codePreviewPanel"; import { FilterPanel } from "./filterPanel"; import { useFilteredMatches } from "./filterPanel/useFilterMatches"; import { SearchResultsPanel, SearchResultsPanelHandle } from "./searchResultsPanel"; +import { RepoResultsPanel } from "./repoResultsPanel"; import { ServiceErrorException } from "@/lib/serviceError"; import { Session } from "next-auth"; @@ -67,6 +68,7 @@ export const SearchResultsPage = ({ error, files, repoInfo, + repoResults, timeToSearchCompletionMs, timeToFirstSearchResultMs, isStreaming, @@ -82,6 +84,9 @@ export const SearchResultsPage = ({ isCaseSensitivityEnabled, }); + // Detect if the query uses select:repo projection + const isSelectRepoMode = /(?:^|\s)select:repo(?:\s|$)/.test(searchQuery); + useEffect(() => { if (error) { toast({ @@ -208,6 +213,9 @@ export const SearchResultsPage = ({ searchStats={stats} isMoreResultsButtonVisible={!isExhaustive} isBranchFilteringEnabled={isBranchFilteringEnabled} + isSelectRepoMode={isSelectRepoMode} + repoResults={repoResults} + searchQuery={searchQuery} /> )} @@ -224,6 +232,9 @@ interface PanelGroupProps { searchDurationMs: number; numMatches: number; searchStats?: SearchStats; + isSelectRepoMode: boolean; + repoResults: RepoResult[] | undefined; + searchQuery: string; } const PanelGroup = ({ @@ -236,6 +247,9 @@ const PanelGroup = ({ searchDurationMs: _searchDurationMs, numMatches, searchStats, + isSelectRepoMode, + repoResults, + searchQuery, }: PanelGroupProps) => { const [previewedFile, setPreviewedFile] = useState(undefined); const filteredFileMatches = useFilteredMatches(fileMatches); @@ -354,7 +368,11 @@ const PanelGroup = ({ { - fileMatches.length > 0 ? ( + isSelectRepoMode ? ( + repoResults && repoResults.length > 0 ? ( +

{`[${searchDurationMs} ms] Found ${repoResults.length} ${repoResults.length === 1 ? 'repository' : 'repositories'}`}

+ ) : null + ) : fileMatches.length > 0 ? (

{`[${searchDurationMs} ms] Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}

) : (

No results

@@ -372,7 +390,20 @@ const PanelGroup = ({ )}
- {filteredFileMatches.length > 0 ? ( + {isSelectRepoMode ? ( + (repoResults && repoResults.length > 0) ? ( + + ) : isStreaming ? ( +
+ +

Searching...

+
+ ) : ( +
+

No results found

+
+ ) + ) : filteredFileMatches.length > 0 ? ( Date: Mon, 16 Mar 2026 01:26:42 +0100 Subject: [PATCH 05/15] feat(ui): add select: autocomplete and syntax highlighting in search bar - constants.ts: adds select:repo to the suggestion completions - searchSuggestionsBox.tsx: surfaces select: suggestions in the dropdown - useSuggestionModeMappings.ts: maps 'select' to its completions - zoektLanguageExtension.ts: highlights 'select:' as a keyword prefix Typing 'select:' in the search bar now shows 'repo' and 'file' as completions, consistent with how lang:, repo:, and file: work. --- .../app/[domain]/components/searchBar/constants.ts | 8 ++++++++ .../components/searchBar/searchSuggestionsBox.tsx | 12 +++++++++++- .../components/searchBar/useRefineModeSuggestions.ts | 2 +- .../searchBar/useSuggestionModeMappings.ts | 6 ++++++ .../components/searchBar/zoektLanguageExtension.ts | 2 +- 5 files changed, 27 insertions(+), 3 deletions(-) diff --git a/packages/web/src/app/[domain]/components/searchBar/constants.ts b/packages/web/src/app/[domain]/components/searchBar/constants.ts index ea93cee87..3dd7b73f3 100644 --- a/packages/web/src/app/[domain]/components/searchBar/constants.ts +++ b/packages/web/src/app/[domain]/components/searchBar/constants.ts @@ -19,6 +19,7 @@ export enum SearchPrefix { fork = "fork:", visibility = "visibility:", context = "context:", + select = "select:", } export const visibilityModeSuggestions: Suggestion[] = [ @@ -66,3 +67,10 @@ export const archivedModeSuggestions: Suggestion[] = [ } ]; + +export const selectModeSuggestions: Suggestion[] = [ + { + value: "repo", + description: "Return the list of repositories containing matches instead of individual file results.", + }, +]; diff --git a/packages/web/src/app/[domain]/components/searchBar/searchSuggestionsBox.tsx b/packages/web/src/app/[domain]/components/searchBar/searchSuggestionsBox.tsx index c5bebb640..eac48554c 100644 --- a/packages/web/src/app/[domain]/components/searchBar/searchSuggestionsBox.tsx +++ b/packages/web/src/app/[domain]/components/searchBar/searchSuggestionsBox.tsx @@ -8,6 +8,7 @@ import { forwardRef, Ref, useEffect, useMemo, useState } from "react"; import { archivedModeSuggestions, forkModeSuggestions, + selectModeSuggestions, visibilityModeSuggestions, } from "./constants"; import { IconType } from "react-icons/lib"; @@ -39,7 +40,8 @@ export type SuggestionMode = "content" | "repo" | "searchHistory" | - "context"; + "context" | + "select"; interface SearchSuggestionsBoxProps { className?: string; @@ -206,6 +208,12 @@ const SearchSuggestionsBox = forwardRef(({ descriptionPlacement: "left", DefaultIcon: VscFilter, } + case "select": + return { + list: selectModeSuggestions, + onSuggestionClicked: createOnSuggestionClickedHandler({ trailingSpace: true }), + descriptionPlacement: "left", + } case "none": case "revision": case "content": @@ -299,6 +307,8 @@ const SearchSuggestionsBox = forwardRef(({ return "Search history" case "context": return "Search contexts" + case "select": + return "Select mode" default: return ""; } diff --git a/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts b/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts index 6e51d982a..e7c60220f 100644 --- a/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts +++ b/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts @@ -27,7 +27,7 @@ export const useRefineModeSuggestions = () => { ] : []), { value: SearchPrefix.select, - description: "Project results — return matching repositories instead of individual files.", + description: "Project results — return repositories instead of files.", spotlight: true, }, { diff --git a/packages/web/src/app/[domain]/components/searchBar/useSuggestionModeMappings.ts b/packages/web/src/app/[domain]/components/searchBar/useSuggestionModeMappings.ts index 64e4acfb9..e0c2f665e 100644 --- a/packages/web/src/app/[domain]/components/searchBar/useSuggestionModeMappings.ts +++ b/packages/web/src/app/[domain]/components/searchBar/useSuggestionModeMappings.ts @@ -91,6 +91,12 @@ export const useSuggestionModeMappings = () => { ] } satisfies SuggestionModeMapping, ] : []), + { + suggestionMode: "select", + prefixes: [ + SearchPrefix.select, + ], + }, ] }, [isSearchContextsEnabled]); diff --git a/packages/web/src/app/[domain]/components/searchBar/zoektLanguageExtension.ts b/packages/web/src/app/[domain]/components/searchBar/zoektLanguageExtension.ts index 6d86a710d..d140bb1a0 100644 --- a/packages/web/src/app/[domain]/components/searchBar/zoektLanguageExtension.ts +++ b/packages/web/src/app/[domain]/components/searchBar/zoektLanguageExtension.ts @@ -47,7 +47,7 @@ export const zoekt = () => { // Check for prefixes first // If these match, we return 'keyword' - if (stream.match(/(archived:|rev:|content:|f:|file:|fork:|visibility:|r:|repo:|regex:|lang:|sym:|t:|type:|context:)/)) { + if (stream.match(/(archived:|rev:|content:|f:|file:|fork:|visibility:|r:|repo:|regex:|lang:|sym:|t:|type:|context:|select:)/)) { return t.keyword.toString(); } From ab58387c06178e88af9c0306416bd50c043849ec Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Mon, 16 Mar 2026 01:35:24 +0100 Subject: [PATCH 06/15] feat(mcp): add search_repos tool and support repoResults in schemas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit schemas.ts / types.ts - Adds repoResultSchema and RepoResult, mirroring the web package - Adds repoResults?: RepoResult[] to searchResponseSchema so the MCP client can parse select:repo API responses index.ts — new tool: search_repos - Accepts query, filterByLanguages, caseSensitive, ref, maxResults - Appends select:repo to the query before calling the search API - Returns a formatted list: repo name, match count, optional URL - Designed to answer 'which repos use X?' questions directly index.ts — shared helpers (reusable by future tools e.g. search_commits) - searchFilterParamsSchema: zod schema spread into each tool's params - buildQueryFilters(): pure function that appends lang:, repo:, file:, rev: filter tokens to a query string index.ts — exports `server` for use in tests --- packages/mcp/package.json | 3 +- packages/mcp/src/index.ts | 190 +++++++++++++++++++++++++++--------- packages/mcp/src/schemas.ts | 8 ++ packages/mcp/src/types.ts | 2 + 4 files changed, 158 insertions(+), 45 deletions(-) diff --git a/packages/mcp/package.json b/packages/mcp/package.json index bad139b54..0ed7bd84d 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -7,7 +7,8 @@ "scripts": { "build": "tsc", "dev": "node ./dist/index.js", - "build:watch": "tsc-watch --preserveWatchOutput" + "build:watch": "tsc-watch --preserveWatchOutput", + "test": "node --import tsx/esm --test src/__tests__/*.test.ts" }, "devDependencies": { "@types/express": "^5.0.1", diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index aa8b27513..3b05eef23 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -15,12 +15,79 @@ import { buildTreeNodeIndex, joinTreePath, normalizeTreePath, sortTreeEntries } const dedent = _dedent.withOptions({ alignValues: true }); // Create MCP server -const server = new McpServer({ +export const server = new McpServer({ name: 'sourcebot-mcp-server', version: '0.1.0', }); +// --------------------------------------------------------------------------- +// Shared query-building helpers +// --------------------------------------------------------------------------- + +/** + * Common filter parameters accepted by every search tool. + * Add new filter params here once and they become available to all tools. + */ +const searchFilterParamsSchema = { + filterByLanguages: z + .array(z.string()) + .describe(`Scope the search to the provided languages.`) + .optional(), + filterByRepos: z + .array(z.string()) + .describe(`Scope the search to the provided repositories.`) + .optional(), + filterByFilepaths: z + .array(z.string()) + .describe(`Scope the search to the provided file paths.`) + .optional(), + ref: z + .string() + .describe(`Commit SHA, branch or tag name to search on. Defaults to the default branch.`) + .optional(), + caseSensitive: z + .boolean() + .describe(`Whether the search should be case sensitive (default: false).`) + .optional(), + useRegex: z + .boolean() + .describe(`Whether to use regular expression matching. When false, substring matching is used. (default: false)`) + .optional(), +}; + +/** + * Appends zoekt filter tokens (lang:, repo:, file:, rev:) to a base query. + */ +const buildQueryFilters = ({ + query, + filterByLanguages = [], + filterByRepos = [], + filterByFilepaths = [], + ref, +}: { + query: string; + filterByLanguages?: string[]; + filterByRepos?: string[]; + filterByFilepaths?: string[]; + ref?: string; +}): string => { + let q = query; + if (filterByRepos.length > 0) { + q += ` (repo:${filterByRepos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; + } + if (filterByLanguages.length > 0) { + q += ` (lang:${filterByLanguages.join(' or lang:')})`; + } + if (filterByFilepaths.length > 0) { + q += ` (file:${filterByFilepaths.map(fp => escapeStringRegexp(fp)).join(' or file:')})`; + } + if (ref) { + q += ` (rev:${ref})`; + } + return q; +}; + server.tool( "search_code", dedent` @@ -36,34 +103,13 @@ server.tool( const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); return `"${escaped}"`; }), - useRegex: z - .boolean() - .describe(`Whether to use regular expression matching to match the search query against code contents. When false, substring matching is used. (default: false)`) - .optional(), - filterByRepos: z - .array(z.string()) - .describe(`Scope the search to the provided repositories.`) - .optional(), - filterByLanguages: z - .array(z.string()) - .describe(`Scope the search to the provided languages.`) - .optional(), - filterByFilepaths: z - .array(z.string()) - .describe(`Scope the search to the provided filepaths.`) - .optional(), - caseSensitive: z - .boolean() - .describe(`Whether the search should be case sensitive (default: false).`) - .optional(), + + ...searchFilterParamsSchema, includeCodeSnippets: z .boolean() .describe(`Whether to include the code snippets in the response. If false, only the file's URL, repository, and language will be returned. (default: false)`) .optional(), - ref: z - .string() - .describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`) - .optional(), + maxTokens: numberSchema .describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`) .transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val)) @@ -80,21 +126,7 @@ server.tool( ref, useRegex = false, }) => { - if (repos.length > 0) { - query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; - } - - if (languages.length > 0) { - query += ` (lang:${languages.join(' or lang:')})`; - } - - if (filepaths.length > 0) { - query += ` (file:${filepaths.map(filepath => escapeStringRegexp(filepath)).join(' or file:')})`; - } - - if (ref) { - query += ` ( rev:${ref} )`; - } + query = buildQueryFilters({ query, filterByRepos: repos, filterByLanguages: languages, filterByFilepaths: filepaths, ref }); const response = await search({ query, @@ -445,12 +477,82 @@ server.tool( } ); + +server.tool( + "search_repos", + `Searches code and returns the list of matching repositories (deduplicated), sorted by number of matches. Useful for answering "which repos use X?" questions. Equivalent to appending select:repo to a Sourcebot query.`, + { + query: z + .string() + .describe(`The search pattern to match against code contents. Supports plain text or regex if useRegex is true.`), + + ...searchFilterParamsSchema, + maxResults: z + .number() + .int() + .positive() + .describe(`Maximum number of repositories to return (default: 50).`) + .optional(), + }, + async ({ + query, + filterByLanguages: languages = [], + caseSensitive = false, + ref, + useRegex = false, + maxResults = 50, + }) => { + let fullQuery = buildQueryFilters({ query, filterByLanguages: languages, ref }); + if (!fullQuery.includes('select:repo')) { + fullQuery += ' select:repo'; + } + + const response = await search({ + query: fullQuery, + matches: env.DEFAULT_MATCHES, + contextLines: 0, + isRegexEnabled: useRegex, + isCaseSensitivityEnabled: caseSensitive, + }); + + const repos = response.repoResults ?? []; + + if (repos.length === 0) { + return { + content: [{ + type: "text", + text: `No repositories found matching: ${query}`, + }], + }; + } + + const limited = repos.slice(0, maxResults); + const lines = limited.map(r => + `repo: ${r.repository} matches: ${r.matchCount}${r.repositoryInfo?.webUrl ? ` url: ${r.repositoryInfo.webUrl}` : ''}` + ); + + const text = [ + `Found ${repos.length} repositor${repos.length === 1 ? 'y' : 'ies'} matching "${query}"${limited.length < repos.length ? ` (showing top ${maxResults})` : ''}:`, + '', + ...lines, + ].join('\n'); + + return { + content: [{ type: "text", text }], + }; + } +); + const runServer = async () => { const transport = new StdioServerTransport(); await server.connect(transport); } -runServer().catch((error) => { - console.error('Failed to start MCP server:', error); - process.exit(1); -}); +// Only auto-start when run directly (not when imported in tests) +const isMain = process.argv[1] && import.meta.url.endsWith(process.argv[1].replace(/\.ts$/, '.js').split('/').pop() ?? ''); +if (isMain) { + runServer().catch((error) => { + console.error('Failed to start MCP server:', error); + process.exit(1); + }); +} diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index a72fbf116..98f1eb7fc 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -114,8 +114,16 @@ export const searchStatsSchema = z.object({ flushReason: z.string(), }); +export const repoResultSchema = z.object({ + repositoryId: z.number(), + repository: z.string(), + repositoryInfo: repositoryInfoSchema.optional(), + matchCount: z.number(), +}); + export const searchResponseSchema = z.object({ stats: searchStatsSchema, + repoResults: z.array(repoResultSchema).optional(), files: z.array(z.object({ fileName: z.object({ // The name of the file diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index 63c050856..0352b30dd 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -3,6 +3,7 @@ import { fileSourceResponseSchema, listReposQueryParamsSchema, + repoResultSchema, locationSchema, searchRequestSchema, searchResponseSchema, @@ -24,6 +25,7 @@ import { } from "./schemas.js"; import { z } from "zod"; +export type RepoResult = z.infer; export type SearchRequest = z.infer; export type SearchResponse = z.infer; export type SearchResultRange = z.infer; From 31640f3215565fff672c1ced4fb8f2d86b0e3fee Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Mon, 16 Mar 2026 01:27:09 +0100 Subject: [PATCH 07/15] =?UTF-8?q?test(mcp):=20add=20test=20suite=20for=20s?= =?UTF-8?q?elect:repo=20=E2=80=94=20schemas,=20transform,=20and=20tool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 15 tests across 4 suites using node:test + tsx (no extra test framework): repoResultSchema (3) - valid parse, with optional repositoryInfo, missing required field searchResponseSchema with repoResults (3) - backward compat (no repoResults), with repoResults, invalid entry search_code hasModifiers transform (5) - detects select:, lang:, repo:; no false positives on plain text or partial words like selector: search_repos tool end-to-end via InMemoryTransport (4) - returns repo list, empty-results message, lang: filter appended, maxResults respected with total count in output Run: yarn workspace @sourcebot/mcp test --- .../mcp/src/__tests__/select-repo.test.ts | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 packages/mcp/src/__tests__/select-repo.test.ts diff --git a/packages/mcp/src/__tests__/select-repo.test.ts b/packages/mcp/src/__tests__/select-repo.test.ts new file mode 100644 index 000000000..33ba941bb --- /dev/null +++ b/packages/mcp/src/__tests__/select-repo.test.ts @@ -0,0 +1,182 @@ +/** + * Tests for the select:repo feature in the MCP server. + * + * Covers: + * 1. repoResultSchema / searchResponseSchema validation + * 2. The hasModifiers transform fix in search_code + * 3. The search_repos tool end-to-end via InMemoryTransport + * + * Run with: + * node --import tsx/esm --test src/__tests__/select-repo.test.ts + */ + +import { describe, it, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js'; +import { repoResultSchema, searchResponseSchema } from '../schemas.js'; + +// ---- helpers ---------------------------------------------------------------- + +function makeStats() { + return { + actualMatchCount: 0, totalMatchCount: 0, duration: 0, fileCount: 0, + filesSkipped: 0, contentBytesLoaded: 0, indexBytesLoaded: 0, crashes: 0, + shardFilesConsidered: 0, filesConsidered: 0, filesLoaded: 0, + shardsScanned: 0, shardsSkipped: 0, shardsSkippedFilter: 0, + ngramMatches: 0, ngramLookups: 0, wait: 0, + matchTreeConstruction: 0, matchTreeSearch: 0, + regexpsConsidered: 0, flushReason: 'none', + }; +} + +function makeSearchResponse(extra: Record = {}) { + return { stats: makeStats(), files: [], repositoryInfo: [], isSearchExhaustive: true, ...extra }; +} + +function mockFetch(payload: unknown) { + globalThis.fetch = async (_input: RequestInfo | URL, _init?: RequestInit) => + new Response(JSON.stringify(payload), { status: 200, headers: { 'Content-Type': 'application/json' } }); +} + +function captureFetch(payload: unknown, onCall: (body: Record) => void) { + globalThis.fetch = async (_input: RequestInfo | URL, init?: RequestInit) => { + onCall(JSON.parse((init?.body as string) ?? '{}')); + return new Response(JSON.stringify(payload), { status: 200, headers: { 'Content-Type': 'application/json' } }); + }; +} + +function getText(result: unknown): string { + return (result as { content: Array<{ type: string; text: string }> }).content + .map((c) => c.text).join('\n'); +} + +// ---- 1. Schema validation --------------------------------------------------- + +describe('repoResultSchema', () => { + it('parses a valid RepoResult', () => { + const r = repoResultSchema.safeParse({ repositoryId: 1, repository: 'github.com/acme/frontend', matchCount: 42 }); + assert.ok(r.success); + assert.equal(r.data.matchCount, 42); + }); + + it('parses a RepoResult with optional repositoryInfo', () => { + const r = repoResultSchema.safeParse({ + repositoryId: 2, repository: 'github.com/acme/backend', matchCount: 7, + repositoryInfo: { id: 2, codeHostType: 'github', name: 'acme/backend', webUrl: 'https://github.com/acme/backend' }, + }); + assert.ok(r.success); + assert.equal(r.data.repositoryInfo?.webUrl, 'https://github.com/acme/backend'); + }); + + it('rejects a RepoResult missing matchCount', () => { + const r = repoResultSchema.safeParse({ repositoryId: 1, repository: 'github.com/acme/x' }); + assert.ok(!r.success, 'should have failed'); + }); +}); + +describe('searchResponseSchema with repoResults', () => { + it('accepts a response without repoResults (backward compat)', () => { + const r = searchResponseSchema.safeParse(makeSearchResponse()); + assert.ok(r.success); + assert.equal(r.data.repoResults, undefined); + }); + + it('accepts a response with repoResults', () => { + const r = searchResponseSchema.safeParse(makeSearchResponse({ + repoResults: [ + { repositoryId: 1, repository: 'github.com/acme/a', matchCount: 10 }, + { repositoryId: 2, repository: 'github.com/acme/b', matchCount: 3 }, + ], + })); + assert.ok(r.success); + assert.equal(r.data.repoResults?.length, 2); + }); + + it('rejects repoResults with a missing required field', () => { + const r = searchResponseSchema.safeParse(makeSearchResponse({ + repoResults: [{ repositoryId: 1, repository: 'github.com/x' }], + })); + assert.ok(!r.success, 'should have failed'); + }); +}); + +// ---- 2. hasModifiers transform logic ---------------------------------------- + +describe('search_code query transform — hasModifiers regex', () => { + const RE = /(?:^|\s)(?:select|repo|lang|file|case|rev|branch|sym|content):/; + + it('detects select:repo modifier', () => assert.ok(RE.test('useState select:repo'))); + it('detects lang: modifier', () => assert.ok(RE.test('function lang:TypeScript'))); + it('detects repo: at start', () => assert.ok(RE.test('repo:acme/frontend useState'))); + it('does not false-positive on plain text', () => { + assert.ok(!RE.test('useState hook')); + assert.ok(!RE.test('async function fetch')); + }); + it('does not match partial words (selector:hover)', () => assert.ok(!RE.test('selector:hover'))); +}); + +// ---- 3. search_repos tool (end-to-end) -------------------------------------- + +describe('search_repos tool', () => { + let client: Client; + let savedFetch: typeof globalThis.fetch; + + before(async () => { + savedFetch = globalThis.fetch; + process.env.SOURCEBOT_HOST = 'http://localhost:3000'; + process.env.SOURCEBOT_API_KEY = 'test-key'; + + // Dynamic import so env vars are set first + const { server } = await import('../index.js'); + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair(); + await server.connect(serverTransport); + client = new Client({ name: 'test-client', version: '0.0.1' }); + await client.connect(clientTransport); + }); + + after(async () => { + await client?.close(); + globalThis.fetch = savedFetch; + }); + + it('returns repo list from API repoResults', async () => { + mockFetch(makeSearchResponse({ + repoResults: [ + { repositoryId: 1, repository: 'github.com/acme/frontend', matchCount: 20 }, + { repositoryId: 2, repository: 'github.com/acme/backend', matchCount: 5 }, + ], + })); + const text = getText(await client.callTool({ name: 'search_repos', arguments: { query: 'useState' } })); + assert.ok(text.includes('github.com/acme/frontend')); + assert.ok(text.includes('github.com/acme/backend')); + assert.ok(text.includes('matches: 20')); + }); + + it('returns no-results message when repoResults is empty', async () => { + mockFetch(makeSearchResponse({ repoResults: [] })); + const text = getText(await client.callTool({ name: 'search_repos', arguments: { query: 'nonExistentSymbol' } })); + assert.ok(text.toLowerCase().includes('no repositories')); + }); + + it('appends select:repo and lang: filters to the query', async () => { + let captured = ''; + captureFetch(makeSearchResponse({ repoResults: [] }), (body) => { captured = body.query as string; }); + await client.callTool({ name: 'search_repos', arguments: { query: 'useState', filterByLanguages: ['TypeScript', 'JavaScript'] } }); + assert.ok(captured.includes('lang:TypeScript'), `query: ${captured}`); + assert.ok(captured.includes('lang:JavaScript'), `query: ${captured}`); + assert.ok(captured.includes('select:repo'), `query: ${captured}`); + }); + + it('respects maxResults limit', async () => { + const repos = Array.from({ length: 10 }, (_, i) => ({ + repositoryId: i, repository: `github.com/acme/repo-${i}`, matchCount: 10 - i, + })); + mockFetch(makeSearchResponse({ repoResults: repos })); + const text = getText(await client.callTool({ name: 'search_repos', arguments: { query: 'test', maxResults: 3 } })); + assert.ok(text.includes('10 repositor'), `missing total: ${text}`); + assert.ok(text.includes('top 3'), `missing limit notice: ${text}`); + const lines = text.split('\n').filter((l: string) => l.startsWith('repo:')); + assert.equal(lines.length, 3); + }); +}); From 09ccf9ec8ff8c7b33f6e47caaeef511552c20091 Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Wed, 18 Mar 2026 08:54:06 +0100 Subject: [PATCH 08/15] chore: update CHANGELOG for select:repo PR #1015 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 339c50cc3..633523952 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added AGENTS.md with Cursor Cloud development environment instructions. [#1001](https://github.com/sourcebot-dev/sourcebot/pull/1001) - Added support for configuring SMTP via individual environment variables (SMTP_HOST, SMTP_PORT, SMTP_USERNAME, SMTP_PASSWORD) as an alternative to SMTP_CONNECTION_URL. [#1002](https://github.com/sourcebot-dev/sourcebot/pull/1002) +- Added `select:repo` query modifier that returns a deduplicated list of matching repositories sorted by match count, with a new `RepoResultsPanel` UI and a `search_repos` MCP tool. [#1015](https://github.com/sourcebot-dev/sourcebot/pull/1015) ## [4.15.6] - 2026-03-13 From 2ae3968c673ec655122a971c586ee3125a2f638c Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Wed, 18 Mar 2026 09:45:43 +0100 Subject: [PATCH 09/15] refactor(search): extract shared accumulateRepoMap helper The streaming (zoektStreamSearch) and non-streaming (applySelectRepo) paths had identical repo-aggregation loops. Extract accumulateRepoMap() in zoektSearcher.ts and use it in both places so any future change to match counting or deduplication stays in one spot. Addresses CodeRabbit review comment on PR #1015. --- packages/web/src/features/search/searchApi.ts | 20 ++---------- .../web/src/features/search/zoektSearcher.ts | 32 +++++++++++++------ 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 5830aed8f..d60a00957 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -8,7 +8,7 @@ import { headers } from "next/headers"; import { QueryIR } from './ir'; import { parseQuerySyntaxIntoIR, SelectMode } from './parser'; import { SearchOptions, SearchResponse, RepoResult } from "./types"; -import { createZoektSearchRequest, zoektSearch, zoektStreamSearch } from './zoektSearcher'; +import { accumulateRepoMap, createZoektSearchRequest, zoektSearch, zoektStreamSearch } from './zoektSearcher'; type QueryStringSearchRequest = { @@ -120,23 +120,7 @@ const getAccessibleRepoNamesForUser = async ({ user, prisma }: { user?: UserWith const applySelectRepo = (result: SearchResponse): SearchResponse => { const repoMap = new Map(); - - for (const file of result.files) { - const repoId = file.repositoryId; - if (!repoMap.has(repoId)) { - const repoInfo = result.repositoryInfo.find(r => r.id === repoId); - repoMap.set(repoId, { - repositoryId: repoId, - repository: file.repository, - repositoryInfo: repoInfo, - matchCount: file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0), - }); - } else { - const existing = repoMap.get(repoId)!; - existing.matchCount += file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); - } - } - + accumulateRepoMap(result.files, result.repositoryInfo, repoMap); return { ...result, repoResults: Array.from(repoMap.values()).sort((a, b) => b.matchCount - a.matchCount), diff --git a/packages/web/src/features/search/zoektSearcher.ts b/packages/web/src/features/search/zoektSearcher.ts index 94c53323c..387434077 100644 --- a/packages/web/src/features/search/zoektSearcher.ts +++ b/packages/web/src/features/search/zoektSearcher.ts @@ -148,6 +148,28 @@ export const zoektSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: }); } +/** + * Accumulates file results into a per-repository match-count map. + * Shared by the streaming (zoektStreamSearch) and non-streaming (applySelectRepo) paths + * to ensure identical aggregation behaviour. + */ +export const accumulateRepoMap = ( + files: SearchResultFile[], + repositoryInfo: RepositoryInfo[], + repoMap: Map, +): void => { + for (const file of files) { + const repoId = file.repositoryId; + const matchCount = file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); + if (!repoMap.has(repoId)) { + const ri = repositoryInfo.find(r => r.id === repoId); + repoMap.set(repoId, { repositoryId: repoId, repository: file.repository, repositoryInfo: ri, matchCount }); + } else { + repoMap.get(repoId)!.matchCount += matchCount; + } + } +}; + export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: PrismaClient, selectMode?: string | null): Promise => { const client = createGrpcClient(); let grpcStream: ReturnType | null = null; @@ -241,15 +263,7 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p // Accumulate repo map for select:repo mode if (selectMode === 'repo') { - for (const file of files) { - const repoId = file.repositoryId; - if (!_accumulatedRepoMap.has(repoId)) { - const ri = repositoryInfo.find(r => r.id === repoId); - _accumulatedRepoMap.set(repoId, { repositoryId: repoId, repository: file.repository, repositoryInfo: ri, matchCount: file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0) }); - } else { - _accumulatedRepoMap.get(repoId)!.matchCount += file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); - } - } + accumulateRepoMap(files, repositoryInfo, _accumulatedRepoMap); } const response: StreamedSearchResponse = { From e2f9f643190de2bab245d57b3a182062a7c1d8c0 Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Wed, 18 Mar 2026 09:46:26 +0100 Subject: [PATCH 10/15] fix(search): derive isSelectRepoMode from Lezer parser instead of raw regex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit searchResultsPage was using a hand-rolled regex to detect select:repo in the query. If the grammar evolves (new select values, quoting rules) the regex could silently drift. Export getSelectModeFromQuery() from parser.ts — it runs the same non-strict Lezer parser as the backend — and use it as the authoritative signal for isSelectRepoMode in searchResultsPage. Addresses CodeRabbit review comment on PR #1015. --- .../[domain]/search/components/searchResultsPage.tsx | 5 +++-- packages/web/src/features/search/parser.ts | 10 ++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx index 9734b2487..59569f356 100644 --- a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx @@ -12,6 +12,7 @@ import { import { Separator } from "@/components/ui/separator"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { RepoResult, RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; +import { getSelectModeFromQuery } from "@/features/search/parser"; import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useDomain } from "@/hooks/useDomain"; import { useNonEmptyQueryParam } from "@/hooks/useNonEmptyQueryParam"; @@ -84,8 +85,8 @@ export const SearchResultsPage = ({ isCaseSensitivityEnabled, }); - // Detect if the query uses select:repo projection - const isSelectRepoMode = /(?:^|\s)select:repo(?:\s|$)/.test(searchQuery); + // Detect if the query uses select:repo projection — uses the same Lezer parser as the backend + const isSelectRepoMode = getSelectModeFromQuery(searchQuery) === 'repo'; useEffect(() => { if (error) { diff --git a/packages/web/src/features/search/parser.ts b/packages/web/src/features/search/parser.ts index 9e365854f..7f5b5a300 100644 --- a/packages/web/src/features/search/parser.ts +++ b/packages/web/src/features/search/parser.ts @@ -67,6 +67,16 @@ const findLinguistLanguage = (value: string): string => { export type SelectMode = 'repo' | null; +/** + * Parses a raw query string and returns the select: modifier value, if present. + * Uses the same Lezer parser as the backend so behaviour is identical. + * Safe to call on the client — uses the non-strict parser variant. + */ +export const getSelectModeFromQuery = (query: string): SelectMode => { + const tree = _parser.parse(query); + return extractSelectMode(tree, query); +}; + /** * Extracts the select: modifier from a Lezer tree. * Returns 'repo' or null. From 120e23b816a37f3c11211927353f597d961a4d3a Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Wed, 18 Mar 2026 09:46:39 +0100 Subject: [PATCH 11/15] fix(ui): quote repo names with spaces or special chars in navigateToRepo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit repo:my org/repo-name is invalid zoekt syntax — the space terminates the token. Wrap names that contain anything other than alphanumerics, hyphens, dots, underscores, or slashes in double-quotes and escape inner backslashes and double-quotes so the resulting filter is always a valid zoekt token. Addresses CodeRabbit review comment on PR #1015. --- .../app/[domain]/search/components/repoResultsPanel.tsx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx b/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx index 05d144e94..241398de2 100644 --- a/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx +++ b/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx @@ -16,11 +16,18 @@ export const RepoResultsPanel = ({ repoResults, searchQuery }: RepoResultsPanelP const router = useRouter(); const navigateToRepo = (repoName: string) => { + // Quote repo names that contain spaces or special characters so the + // zoekt repo: filter parses them as a single token. + const needsQuoting = /[^a-zA-Z0-9\-._/]/.test(repoName); + const safeRepo = needsQuoting + ? `"${repoName.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"` + : repoName; + // Replace select:repo with repo:xxx, preserving all other filters const newQuery = searchQuery .replace(/(?:^|\s)select:repo(?:\s|$)/g, ' ') .trim() - .concat(` repo:${repoName}`) + .concat(` repo:${safeRepo}`) .trim(); const path = createPathWithQueryParams( `/${domain}/search`, From 14812ecc7607b70023957925f5d1d30d19e51db0 Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Wed, 18 Mar 2026 09:46:51 +0100 Subject: [PATCH 12/15] fix(mcp): wire filterByRepos and filterByFilepaths in search_repos handler The search_repos tool spread searchFilterParamsSchema (which includes filterByRepos and filterByFilepaths) but the handler destructured neither field, so both filters were silently dropped. Callers expecting to scope a search_repos call to specific repositories or file paths got unscoped results instead. Destructure and forward both fields to buildQueryFilters. Addresses CodeRabbit review comment on PR #1015. --- packages/mcp/src/index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 3b05eef23..6b409a396 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -497,12 +497,14 @@ server.tool( async ({ query, filterByLanguages: languages = [], + filterByRepos: repos = [], + filterByFilepaths: filepaths = [], caseSensitive = false, ref, useRegex = false, maxResults = 50, }) => { - let fullQuery = buildQueryFilters({ query, filterByLanguages: languages, ref }); + let fullQuery = buildQueryFilters({ query, filterByLanguages: languages, filterByRepos: repos, filterByFilepaths: filepaths, ref }); if (!fullQuery.includes('select:repo')) { fullQuery += ' select:repo'; } From 6caa4fbdddf15eec174aabfc405dd931864a574d Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Wed, 18 Mar 2026 09:46:59 +0100 Subject: [PATCH 13/15] fix(mcp): use word-boundary regex for select:repo guard in search_repos String.includes('select:repo') would match the substring inside a longer token such as 'myprefix-select:repo' and suppress the modifier incorrectly. Use a regex that requires whitespace or string boundaries on both sides, consistent with how zoekt tokenises query modifiers. Addresses CodeRabbit review comment on PR #1015. --- packages/mcp/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 6b409a396..3b3eaf022 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -505,7 +505,7 @@ server.tool( maxResults = 50, }) => { let fullQuery = buildQueryFilters({ query, filterByLanguages: languages, filterByRepos: repos, filterByFilepaths: filepaths, ref }); - if (!fullQuery.includes('select:repo')) { + if (!/(?:^|\s)select:repo(?:\s|$)/.test(fullQuery)) { fullQuery += ' select:repo'; } From 6205b5dad14e0f324cbf75f9ab4c0cb877622c6d Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Wed, 18 Mar 2026 09:47:12 +0100 Subject: [PATCH 14/15] fix(ui): show repo-centric streaming status in select:repo mode While streaming, the status bar displayed 'Found N matches in M files' even when the query uses select:repo (where files is always empty). Branch on isSelectRepoMode to show 'Found N repositories' instead, consistent with the RepoResultsPanel that is rendered below. Addresses CodeRabbit review comment on PR #1015. --- .../[domain]/search/components/searchResultsPage.tsx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx index 59569f356..73fcb046c 100644 --- a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx @@ -341,8 +341,14 @@ const PanelGroup = ({ <>

Searching...

- {numMatches > 0 && ( -

{`Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}

+ {isSelectRepoMode ? ( + (repoResults?.length ?? 0) > 0 && ( +

{`Found ${repoResults!.length} ${repoResults!.length === 1 ? 'repository' : 'repositories'}`}

+ ) + ) : ( + numMatches > 0 && ( +

{`Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}

+ ) )} ) : ( From 8779b74dcde40459b41d974b005d7850f49ccb73 Mon Sep 17 00:00:00 2001 From: thomasleveil Date: Wed, 18 Mar 2026 09:47:28 +0100 Subject: [PATCH 15/15] test(mcp): document intentional regex isolation in hasModifiers suite The hasModifiers regex was defined locally rather than imported from the implementation. Add a comment explaining this is deliberate: the suite asserts the expected detection contract, not the implementation detail, so a regression in source will still fail these tests even if the regex was simultaneously updated there. Addresses CodeRabbit nitpick on PR #1015. --- packages/mcp/src/__tests__/select-repo.test.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/mcp/src/__tests__/select-repo.test.ts b/packages/mcp/src/__tests__/select-repo.test.ts index 33ba941bb..5995bbb87 100644 --- a/packages/mcp/src/__tests__/select-repo.test.ts +++ b/packages/mcp/src/__tests__/select-repo.test.ts @@ -104,6 +104,11 @@ describe('searchResponseSchema with repoResults', () => { // ---- 2. hasModifiers transform logic ---------------------------------------- describe('search_code query transform — hasModifiers regex', () => { + // The regex is intentionally defined here rather than imported from the + // implementation. These tests assert the expected contract (which modifiers + // should and should not be detected) independently of the implementation, + // so a change to the source that breaks the contract will fail the tests + // even if the regex itself was updated. const RE = /(?:^|\s)(?:select|repo|lang|file|case|rev|branch|sym|content):/; it('detects select:repo modifier', () => assert.ok(RE.test('useState select:repo')));