diff --git a/CHANGELOG.md b/CHANGELOG.md index 339c50cc3..633523952 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added AGENTS.md with Cursor Cloud development environment instructions. [#1001](https://github.com/sourcebot-dev/sourcebot/pull/1001) - Added support for configuring SMTP via individual environment variables (SMTP_HOST, SMTP_PORT, SMTP_USERNAME, SMTP_PASSWORD) as an alternative to SMTP_CONNECTION_URL. [#1002](https://github.com/sourcebot-dev/sourcebot/pull/1002) +- Added `select:repo` query modifier that returns a deduplicated list of matching repositories sorted by match count, with a new `RepoResultsPanel` UI and a `search_repos` MCP tool. [#1015](https://github.com/sourcebot-dev/sourcebot/pull/1015) ## [4.15.6] - 2026-03-13 diff --git a/packages/mcp/package.json b/packages/mcp/package.json index bad139b54..0ed7bd84d 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -7,7 +7,8 @@ "scripts": { "build": "tsc", "dev": "node ./dist/index.js", - "build:watch": "tsc-watch --preserveWatchOutput" + "build:watch": "tsc-watch --preserveWatchOutput", + "test": "node --import tsx/esm --test src/__tests__/*.test.ts" }, "devDependencies": { "@types/express": "^5.0.1", diff --git a/packages/mcp/src/__tests__/select-repo.test.ts b/packages/mcp/src/__tests__/select-repo.test.ts new file mode 100644 index 000000000..5995bbb87 --- /dev/null +++ b/packages/mcp/src/__tests__/select-repo.test.ts @@ -0,0 +1,187 @@ +/** + * Tests for the select:repo feature in the MCP server. + * + * Covers: + * 1. repoResultSchema / searchResponseSchema validation + * 2. The hasModifiers transform fix in search_code + * 3. The search_repos tool end-to-end via InMemoryTransport + * + * Run with: + * node --import tsx/esm --test src/__tests__/select-repo.test.ts + */ + +import { describe, it, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js'; +import { repoResultSchema, searchResponseSchema } from '../schemas.js'; + +// ---- helpers ---------------------------------------------------------------- + +function makeStats() { + return { + actualMatchCount: 0, totalMatchCount: 0, duration: 0, fileCount: 0, + filesSkipped: 0, contentBytesLoaded: 0, indexBytesLoaded: 0, crashes: 0, + shardFilesConsidered: 0, filesConsidered: 0, filesLoaded: 0, + shardsScanned: 0, shardsSkipped: 0, shardsSkippedFilter: 0, + ngramMatches: 0, ngramLookups: 0, wait: 0, + matchTreeConstruction: 0, matchTreeSearch: 0, + regexpsConsidered: 0, flushReason: 'none', + }; +} + +function makeSearchResponse(extra: Record = {}) { + return { stats: makeStats(), files: [], repositoryInfo: [], isSearchExhaustive: true, ...extra }; +} + +function mockFetch(payload: unknown) { + globalThis.fetch = async (_input: RequestInfo | URL, _init?: RequestInit) => + new Response(JSON.stringify(payload), { status: 200, headers: { 'Content-Type': 'application/json' } }); +} + +function captureFetch(payload: unknown, onCall: (body: Record) => void) { + globalThis.fetch = async (_input: RequestInfo | URL, init?: RequestInit) => { + onCall(JSON.parse((init?.body as string) ?? '{}')); + return new Response(JSON.stringify(payload), { status: 200, headers: { 'Content-Type': 'application/json' } }); + }; +} + +function getText(result: unknown): string { + return (result as { content: Array<{ type: string; text: string }> }).content + .map((c) => c.text).join('\n'); +} + +// ---- 1. Schema validation --------------------------------------------------- + +describe('repoResultSchema', () => { + it('parses a valid RepoResult', () => { + const r = repoResultSchema.safeParse({ repositoryId: 1, repository: 'github.com/acme/frontend', matchCount: 42 }); + assert.ok(r.success); + assert.equal(r.data.matchCount, 42); + }); + + it('parses a RepoResult with optional repositoryInfo', () => { + const r = repoResultSchema.safeParse({ + repositoryId: 2, repository: 'github.com/acme/backend', matchCount: 7, + repositoryInfo: { id: 2, codeHostType: 'github', name: 'acme/backend', webUrl: 'https://github.com/acme/backend' }, + }); + assert.ok(r.success); + assert.equal(r.data.repositoryInfo?.webUrl, 'https://github.com/acme/backend'); + }); + + it('rejects a RepoResult missing matchCount', () => { + const r = repoResultSchema.safeParse({ repositoryId: 1, repository: 'github.com/acme/x' }); + assert.ok(!r.success, 'should have failed'); + }); +}); + +describe('searchResponseSchema with repoResults', () => { + it('accepts a response without repoResults (backward compat)', () => { + const r = searchResponseSchema.safeParse(makeSearchResponse()); + assert.ok(r.success); + assert.equal(r.data.repoResults, undefined); + }); + + it('accepts a response with repoResults', () => { + const r = searchResponseSchema.safeParse(makeSearchResponse({ + repoResults: [ + { repositoryId: 1, repository: 'github.com/acme/a', matchCount: 10 }, + { repositoryId: 2, repository: 'github.com/acme/b', matchCount: 3 }, + ], + })); + assert.ok(r.success); + assert.equal(r.data.repoResults?.length, 2); + }); + + it('rejects repoResults with a missing required field', () => { + const r = searchResponseSchema.safeParse(makeSearchResponse({ + repoResults: [{ repositoryId: 1, repository: 'github.com/x' }], + })); + assert.ok(!r.success, 'should have failed'); + }); +}); + +// ---- 2. hasModifiers transform logic ---------------------------------------- + +describe('search_code query transform — hasModifiers regex', () => { + // The regex is intentionally defined here rather than imported from the + // implementation. These tests assert the expected contract (which modifiers + // should and should not be detected) independently of the implementation, + // so a change to the source that breaks the contract will fail the tests + // even if the regex itself was updated. + const RE = /(?:^|\s)(?:select|repo|lang|file|case|rev|branch|sym|content):/; + + it('detects select:repo modifier', () => assert.ok(RE.test('useState select:repo'))); + it('detects lang: modifier', () => assert.ok(RE.test('function lang:TypeScript'))); + it('detects repo: at start', () => assert.ok(RE.test('repo:acme/frontend useState'))); + it('does not false-positive on plain text', () => { + assert.ok(!RE.test('useState hook')); + assert.ok(!RE.test('async function fetch')); + }); + it('does not match partial words (selector:hover)', () => assert.ok(!RE.test('selector:hover'))); +}); + +// ---- 3. search_repos tool (end-to-end) -------------------------------------- + +describe('search_repos tool', () => { + let client: Client; + let savedFetch: typeof globalThis.fetch; + + before(async () => { + savedFetch = globalThis.fetch; + process.env.SOURCEBOT_HOST = 'http://localhost:3000'; + process.env.SOURCEBOT_API_KEY = 'test-key'; + + // Dynamic import so env vars are set first + const { server } = await import('../index.js'); + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair(); + await server.connect(serverTransport); + client = new Client({ name: 'test-client', version: '0.0.1' }); + await client.connect(clientTransport); + }); + + after(async () => { + await client?.close(); + globalThis.fetch = savedFetch; + }); + + it('returns repo list from API repoResults', async () => { + mockFetch(makeSearchResponse({ + repoResults: [ + { repositoryId: 1, repository: 'github.com/acme/frontend', matchCount: 20 }, + { repositoryId: 2, repository: 'github.com/acme/backend', matchCount: 5 }, + ], + })); + const text = getText(await client.callTool({ name: 'search_repos', arguments: { query: 'useState' } })); + assert.ok(text.includes('github.com/acme/frontend')); + assert.ok(text.includes('github.com/acme/backend')); + assert.ok(text.includes('matches: 20')); + }); + + it('returns no-results message when repoResults is empty', async () => { + mockFetch(makeSearchResponse({ repoResults: [] })); + const text = getText(await client.callTool({ name: 'search_repos', arguments: { query: 'nonExistentSymbol' } })); + assert.ok(text.toLowerCase().includes('no repositories')); + }); + + it('appends select:repo and lang: filters to the query', async () => { + let captured = ''; + captureFetch(makeSearchResponse({ repoResults: [] }), (body) => { captured = body.query as string; }); + await client.callTool({ name: 'search_repos', arguments: { query: 'useState', filterByLanguages: ['TypeScript', 'JavaScript'] } }); + assert.ok(captured.includes('lang:TypeScript'), `query: ${captured}`); + assert.ok(captured.includes('lang:JavaScript'), `query: ${captured}`); + assert.ok(captured.includes('select:repo'), `query: ${captured}`); + }); + + it('respects maxResults limit', async () => { + const repos = Array.from({ length: 10 }, (_, i) => ({ + repositoryId: i, repository: `github.com/acme/repo-${i}`, matchCount: 10 - i, + })); + mockFetch(makeSearchResponse({ repoResults: repos })); + const text = getText(await client.callTool({ name: 'search_repos', arguments: { query: 'test', maxResults: 3 } })); + assert.ok(text.includes('10 repositor'), `missing total: ${text}`); + assert.ok(text.includes('top 3'), `missing limit notice: ${text}`); + const lines = text.split('\n').filter((l: string) => l.startsWith('repo:')); + assert.equal(lines.length, 3); + }); +}); diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index aa8b27513..3b3eaf022 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -15,12 +15,79 @@ import { buildTreeNodeIndex, joinTreePath, normalizeTreePath, sortTreeEntries } const dedent = _dedent.withOptions({ alignValues: true }); // Create MCP server -const server = new McpServer({ +export const server = new McpServer({ name: 'sourcebot-mcp-server', version: '0.1.0', }); +// --------------------------------------------------------------------------- +// Shared query-building helpers +// --------------------------------------------------------------------------- + +/** + * Common filter parameters accepted by every search tool. + * Add new filter params here once and they become available to all tools. + */ +const searchFilterParamsSchema = { + filterByLanguages: z + .array(z.string()) + .describe(`Scope the search to the provided languages.`) + .optional(), + filterByRepos: z + .array(z.string()) + .describe(`Scope the search to the provided repositories.`) + .optional(), + filterByFilepaths: z + .array(z.string()) + .describe(`Scope the search to the provided file paths.`) + .optional(), + ref: z + .string() + .describe(`Commit SHA, branch or tag name to search on. Defaults to the default branch.`) + .optional(), + caseSensitive: z + .boolean() + .describe(`Whether the search should be case sensitive (default: false).`) + .optional(), + useRegex: z + .boolean() + .describe(`Whether to use regular expression matching. When false, substring matching is used. (default: false)`) + .optional(), +}; + +/** + * Appends zoekt filter tokens (lang:, repo:, file:, rev:) to a base query. + */ +const buildQueryFilters = ({ + query, + filterByLanguages = [], + filterByRepos = [], + filterByFilepaths = [], + ref, +}: { + query: string; + filterByLanguages?: string[]; + filterByRepos?: string[]; + filterByFilepaths?: string[]; + ref?: string; +}): string => { + let q = query; + if (filterByRepos.length > 0) { + q += ` (repo:${filterByRepos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; + } + if (filterByLanguages.length > 0) { + q += ` (lang:${filterByLanguages.join(' or lang:')})`; + } + if (filterByFilepaths.length > 0) { + q += ` (file:${filterByFilepaths.map(fp => escapeStringRegexp(fp)).join(' or file:')})`; + } + if (ref) { + q += ` (rev:${ref})`; + } + return q; +}; + server.tool( "search_code", dedent` @@ -36,34 +103,13 @@ server.tool( const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); return `"${escaped}"`; }), - useRegex: z - .boolean() - .describe(`Whether to use regular expression matching to match the search query against code contents. When false, substring matching is used. (default: false)`) - .optional(), - filterByRepos: z - .array(z.string()) - .describe(`Scope the search to the provided repositories.`) - .optional(), - filterByLanguages: z - .array(z.string()) - .describe(`Scope the search to the provided languages.`) - .optional(), - filterByFilepaths: z - .array(z.string()) - .describe(`Scope the search to the provided filepaths.`) - .optional(), - caseSensitive: z - .boolean() - .describe(`Whether the search should be case sensitive (default: false).`) - .optional(), + + ...searchFilterParamsSchema, includeCodeSnippets: z .boolean() .describe(`Whether to include the code snippets in the response. If false, only the file's URL, repository, and language will be returned. (default: false)`) .optional(), - ref: z - .string() - .describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`) - .optional(), + maxTokens: numberSchema .describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`) .transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val)) @@ -80,21 +126,7 @@ server.tool( ref, useRegex = false, }) => { - if (repos.length > 0) { - query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; - } - - if (languages.length > 0) { - query += ` (lang:${languages.join(' or lang:')})`; - } - - if (filepaths.length > 0) { - query += ` (file:${filepaths.map(filepath => escapeStringRegexp(filepath)).join(' or file:')})`; - } - - if (ref) { - query += ` ( rev:${ref} )`; - } + query = buildQueryFilters({ query, filterByRepos: repos, filterByLanguages: languages, filterByFilepaths: filepaths, ref }); const response = await search({ query, @@ -445,12 +477,84 @@ server.tool( } ); + +server.tool( + "search_repos", + `Searches code and returns the list of matching repositories (deduplicated), sorted by number of matches. Useful for answering "which repos use X?" questions. Equivalent to appending select:repo to a Sourcebot query.`, + { + query: z + .string() + .describe(`The search pattern to match against code contents. Supports plain text or regex if useRegex is true.`), + + ...searchFilterParamsSchema, + maxResults: z + .number() + .int() + .positive() + .describe(`Maximum number of repositories to return (default: 50).`) + .optional(), + }, + async ({ + query, + filterByLanguages: languages = [], + filterByRepos: repos = [], + filterByFilepaths: filepaths = [], + caseSensitive = false, + ref, + useRegex = false, + maxResults = 50, + }) => { + let fullQuery = buildQueryFilters({ query, filterByLanguages: languages, filterByRepos: repos, filterByFilepaths: filepaths, ref }); + if (!/(?:^|\s)select:repo(?:\s|$)/.test(fullQuery)) { + fullQuery += ' select:repo'; + } + + const response = await search({ + query: fullQuery, + matches: env.DEFAULT_MATCHES, + contextLines: 0, + isRegexEnabled: useRegex, + isCaseSensitivityEnabled: caseSensitive, + }); + + const repos = response.repoResults ?? []; + + if (repos.length === 0) { + return { + content: [{ + type: "text", + text: `No repositories found matching: ${query}`, + }], + }; + } + + const limited = repos.slice(0, maxResults); + const lines = limited.map(r => + `repo: ${r.repository} matches: ${r.matchCount}${r.repositoryInfo?.webUrl ? ` url: ${r.repositoryInfo.webUrl}` : ''}` + ); + + const text = [ + `Found ${repos.length} repositor${repos.length === 1 ? 'y' : 'ies'} matching "${query}"${limited.length < repos.length ? ` (showing top ${maxResults})` : ''}:`, + '', + ...lines, + ].join('\n'); + + return { + content: [{ type: "text", text }], + }; + } +); + const runServer = async () => { const transport = new StdioServerTransport(); await server.connect(transport); } -runServer().catch((error) => { - console.error('Failed to start MCP server:', error); - process.exit(1); -}); +// Only auto-start when run directly (not when imported in tests) +const isMain = process.argv[1] && import.meta.url.endsWith(process.argv[1].replace(/\.ts$/, '.js').split('/').pop() ?? ''); +if (isMain) { + runServer().catch((error) => { + console.error('Failed to start MCP server:', error); + process.exit(1); + }); +} diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index a72fbf116..98f1eb7fc 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -114,8 +114,16 @@ export const searchStatsSchema = z.object({ flushReason: z.string(), }); +export const repoResultSchema = z.object({ + repositoryId: z.number(), + repository: z.string(), + repositoryInfo: repositoryInfoSchema.optional(), + matchCount: z.number(), +}); + export const searchResponseSchema = z.object({ stats: searchStatsSchema, + repoResults: z.array(repoResultSchema).optional(), files: z.array(z.object({ fileName: z.object({ // The name of the file diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index 63c050856..0352b30dd 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -3,6 +3,7 @@ import { fileSourceResponseSchema, listReposQueryParamsSchema, + repoResultSchema, locationSchema, searchRequestSchema, searchResponseSchema, @@ -24,6 +25,7 @@ import { } from "./schemas.js"; import { z } from "zod"; +export type RepoResult = z.infer; export type SearchRequest = z.infer; export type SearchResponse = z.infer; export type SearchResultRange = z.infer; diff --git a/packages/queryLanguage/src/query.grammar b/packages/queryLanguage/src/query.grammar index 5cff3c685..0372bedd7 100644 --- a/packages/queryLanguage/src/query.grammar +++ b/packages/queryLanguage/src/query.grammar @@ -47,7 +47,8 @@ PrefixExpr { RepoExpr | LangExpr | SymExpr | - RepoSetExpr + RepoSetExpr | + SelectExpr } RevisionExpr { revisionKw value } @@ -59,6 +60,8 @@ LangExpr { langKw value } SymExpr { symKw value } RepoSetExpr { reposetKw value } +SelectExpr { selectKw value } + // Modifiers ArchivedExpr { archivedKw archivedValue } ForkExpr { forkKw forkValue } @@ -87,6 +90,7 @@ value { quotedString | word } langKw { "lang:" } symKw { "sym:" } reposetKw { "reposet:" } + selectKw { "select:" } // 'or' is now handled by external orToken tokenizer @@ -100,6 +104,6 @@ value { quotedString | word } quotedString, archivedKw, revisionKw, contentKw, contextKw, fileKw, forkKw, visibilityKw, repoKw, langKw, - symKw, reposetKw + symKw, reposetKw, selectKw } } \ No newline at end of file diff --git a/packages/queryLanguage/src/tokens.ts b/packages/queryLanguage/src/tokens.ts index 4fbd656c1..124942f1f 100644 --- a/packages/queryLanguage/src/tokens.ts +++ b/packages/queryLanguage/src/tokens.ts @@ -25,6 +25,7 @@ const PREFIXES = [ "lang:", "sym:", "reposet:", + "select:", ]; function isWhitespace(ch: number): boolean { diff --git a/packages/web/src/app/[domain]/components/searchBar/constants.ts b/packages/web/src/app/[domain]/components/searchBar/constants.ts index ea93cee87..3dd7b73f3 100644 --- a/packages/web/src/app/[domain]/components/searchBar/constants.ts +++ b/packages/web/src/app/[domain]/components/searchBar/constants.ts @@ -19,6 +19,7 @@ export enum SearchPrefix { fork = "fork:", visibility = "visibility:", context = "context:", + select = "select:", } export const visibilityModeSuggestions: Suggestion[] = [ @@ -66,3 +67,10 @@ export const archivedModeSuggestions: Suggestion[] = [ } ]; + +export const selectModeSuggestions: Suggestion[] = [ + { + value: "repo", + description: "Return the list of repositories containing matches instead of individual file results.", + }, +]; diff --git a/packages/web/src/app/[domain]/components/searchBar/searchSuggestionsBox.tsx b/packages/web/src/app/[domain]/components/searchBar/searchSuggestionsBox.tsx index c5bebb640..eac48554c 100644 --- a/packages/web/src/app/[domain]/components/searchBar/searchSuggestionsBox.tsx +++ b/packages/web/src/app/[domain]/components/searchBar/searchSuggestionsBox.tsx @@ -8,6 +8,7 @@ import { forwardRef, Ref, useEffect, useMemo, useState } from "react"; import { archivedModeSuggestions, forkModeSuggestions, + selectModeSuggestions, visibilityModeSuggestions, } from "./constants"; import { IconType } from "react-icons/lib"; @@ -39,7 +40,8 @@ export type SuggestionMode = "content" | "repo" | "searchHistory" | - "context"; + "context" | + "select"; interface SearchSuggestionsBoxProps { className?: string; @@ -206,6 +208,12 @@ const SearchSuggestionsBox = forwardRef(({ descriptionPlacement: "left", DefaultIcon: VscFilter, } + case "select": + return { + list: selectModeSuggestions, + onSuggestionClicked: createOnSuggestionClickedHandler({ trailingSpace: true }), + descriptionPlacement: "left", + } case "none": case "revision": case "content": @@ -299,6 +307,8 @@ const SearchSuggestionsBox = forwardRef(({ return "Search history" case "context": return "Search contexts" + case "select": + return "Select mode" default: return ""; } diff --git a/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts b/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts index 1c8df2a89..e7c60220f 100644 --- a/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts +++ b/packages/web/src/app/[domain]/components/searchBar/useRefineModeSuggestions.ts @@ -25,6 +25,11 @@ export const useRefineModeSuggestions = () => { description: "Exclude results from the given search context." }, ] : []), + { + value: SearchPrefix.select, + description: "Project results — return repositories instead of files.", + spotlight: true, + }, { value: SearchPrefix.visibility, description: "Filter on repository visibility." diff --git a/packages/web/src/app/[domain]/components/searchBar/useSuggestionModeMappings.ts b/packages/web/src/app/[domain]/components/searchBar/useSuggestionModeMappings.ts index 64e4acfb9..e0c2f665e 100644 --- a/packages/web/src/app/[domain]/components/searchBar/useSuggestionModeMappings.ts +++ b/packages/web/src/app/[domain]/components/searchBar/useSuggestionModeMappings.ts @@ -91,6 +91,12 @@ export const useSuggestionModeMappings = () => { ] } satisfies SuggestionModeMapping, ] : []), + { + suggestionMode: "select", + prefixes: [ + SearchPrefix.select, + ], + }, ] }, [isSearchContextsEnabled]); diff --git a/packages/web/src/app/[domain]/components/searchBar/zoektLanguageExtension.ts b/packages/web/src/app/[domain]/components/searchBar/zoektLanguageExtension.ts index 6d86a710d..d140bb1a0 100644 --- a/packages/web/src/app/[domain]/components/searchBar/zoektLanguageExtension.ts +++ b/packages/web/src/app/[domain]/components/searchBar/zoektLanguageExtension.ts @@ -47,7 +47,7 @@ export const zoekt = () => { // Check for prefixes first // If these match, we return 'keyword' - if (stream.match(/(archived:|rev:|content:|f:|file:|fork:|visibility:|r:|repo:|regex:|lang:|sym:|t:|type:|context:)/)) { + if (stream.match(/(archived:|rev:|content:|f:|file:|fork:|visibility:|r:|repo:|regex:|lang:|sym:|t:|type:|context:|select:)/)) { return t.keyword.toString(); } diff --git a/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx b/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx new file mode 100644 index 000000000..241398de2 --- /dev/null +++ b/packages/web/src/app/[domain]/search/components/repoResultsPanel.tsx @@ -0,0 +1,75 @@ +'use client'; + +import { RepoResult, RepositoryInfo } from "@/features/search"; +import { useDomain } from "@/hooks/useDomain"; +import { createPathWithQueryParams } from "@/lib/utils"; +import { useRouter } from "next/navigation"; +import { SearchQueryParams } from "@/lib/types"; + +interface RepoResultsPanelProps { + repoResults: RepoResult[]; + searchQuery: string; +} + +export const RepoResultsPanel = ({ repoResults, searchQuery }: RepoResultsPanelProps) => { + const domain = useDomain(); + const router = useRouter(); + + const navigateToRepo = (repoName: string) => { + // Quote repo names that contain spaces or special characters so the + // zoekt repo: filter parses them as a single token. + const needsQuoting = /[^a-zA-Z0-9\-._/]/.test(repoName); + const safeRepo = needsQuoting + ? `"${repoName.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"` + : repoName; + + // Replace select:repo with repo:xxx, preserving all other filters + const newQuery = searchQuery + .replace(/(?:^|\s)select:repo(?:\s|$)/g, ' ') + .trim() + .concat(` repo:${safeRepo}`) + .trim(); + const path = createPathWithQueryParams( + `/${domain}/search`, + [SearchQueryParams.query, newQuery], + ); + router.push(path); + }; + + if (repoResults.length === 0) { + return ( +
+ No repositories found +
+ ); + } + + return ( +
+
+ {repoResults.length} {repoResults.length === 1 ? "repository" : "repositories"} matched +
+ {repoResults.map((repo) => ( + + ))} +
+ ); +}; diff --git a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx index a8a7c9139..73fcb046c 100644 --- a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx @@ -11,7 +11,8 @@ import { } from "@/components/ui/resizable"; import { Separator } from "@/components/ui/separator"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; -import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; +import { RepoResult, RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; +import { getSelectModeFromQuery } from "@/features/search/parser"; import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useDomain } from "@/hooks/useDomain"; import { useNonEmptyQueryParam } from "@/hooks/useNonEmptyQueryParam"; @@ -33,6 +34,7 @@ import { CodePreviewPanel } from "./codePreviewPanel"; import { FilterPanel } from "./filterPanel"; import { useFilteredMatches } from "./filterPanel/useFilterMatches"; import { SearchResultsPanel, SearchResultsPanelHandle } from "./searchResultsPanel"; +import { RepoResultsPanel } from "./repoResultsPanel"; import { ServiceErrorException } from "@/lib/serviceError"; import { Session } from "next-auth"; @@ -67,6 +69,7 @@ export const SearchResultsPage = ({ error, files, repoInfo, + repoResults, timeToSearchCompletionMs, timeToFirstSearchResultMs, isStreaming, @@ -82,6 +85,9 @@ export const SearchResultsPage = ({ isCaseSensitivityEnabled, }); + // Detect if the query uses select:repo projection — uses the same Lezer parser as the backend + const isSelectRepoMode = getSelectModeFromQuery(searchQuery) === 'repo'; + useEffect(() => { if (error) { toast({ @@ -208,6 +214,9 @@ export const SearchResultsPage = ({ searchStats={stats} isMoreResultsButtonVisible={!isExhaustive} isBranchFilteringEnabled={isBranchFilteringEnabled} + isSelectRepoMode={isSelectRepoMode} + repoResults={repoResults} + searchQuery={searchQuery} /> )} @@ -224,6 +233,9 @@ interface PanelGroupProps { searchDurationMs: number; numMatches: number; searchStats?: SearchStats; + isSelectRepoMode: boolean; + repoResults: RepoResult[] | undefined; + searchQuery: string; } const PanelGroup = ({ @@ -236,6 +248,9 @@ const PanelGroup = ({ searchDurationMs: _searchDurationMs, numMatches, searchStats, + isSelectRepoMode, + repoResults, + searchQuery, }: PanelGroupProps) => { const [previewedFile, setPreviewedFile] = useState(undefined); const filteredFileMatches = useFilteredMatches(fileMatches); @@ -326,8 +341,14 @@ const PanelGroup = ({ <>

Searching...

- {numMatches > 0 && ( -

{`Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}

+ {isSelectRepoMode ? ( + (repoResults?.length ?? 0) > 0 && ( +

{`Found ${repoResults!.length} ${repoResults!.length === 1 ? 'repository' : 'repositories'}`}

+ ) + ) : ( + numMatches > 0 && ( +

{`Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}

+ ) )} ) : ( @@ -354,7 +375,11 @@ const PanelGroup = ({ { - fileMatches.length > 0 ? ( + isSelectRepoMode ? ( + repoResults && repoResults.length > 0 ? ( +

{`[${searchDurationMs} ms] Found ${repoResults.length} ${repoResults.length === 1 ? 'repository' : 'repositories'}`}

+ ) : null + ) : fileMatches.length > 0 ? (

{`[${searchDurationMs} ms] Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}

) : (

No results

@@ -372,7 +397,20 @@ const PanelGroup = ({ )}
- {filteredFileMatches.length > 0 ? ( + {isSelectRepoMode ? ( + (repoResults && repoResults.length > 0) ? ( + + ) : isStreaming ? ( +
+ +

Searching...

+
+ ) : ( +
+

No results found

+
+ ) + ) : filteredFileMatches.length > 0 ? ( ; + repoResults: RepoResult[]; numMatches: number; timeToSearchCompletionMs: number; timeToFirstSearchResultMs: number; @@ -41,6 +42,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex error: Error | null, files: SearchResultFile[], repoInfo: Record, + repoResults: RepoResult[], timeToSearchCompletionMs: number, timeToFirstSearchResultMs: number, numMatches: number, @@ -51,6 +53,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex error: null, files: [], repoInfo: {}, + repoResults: [], timeToSearchCompletionMs: 0, timeToFirstSearchResultMs: 0, numMatches: 0, @@ -98,6 +101,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex error: null, files: cachedEntry.files, repoInfo: cachedEntry.repoInfo, + repoResults: cachedEntry.repoResults ?? [], timeToSearchCompletionMs: cachedEntry.timeToSearchCompletionMs, timeToFirstSearchResultMs: cachedEntry.timeToFirstSearchResultMs, numMatches: cachedEntry.numMatches, @@ -111,6 +115,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex error: null, files: [], repoInfo: {}, + repoResults: [], timeToSearchCompletionMs: 0, timeToFirstSearchResultMs: 0, numMatches: 0, @@ -200,6 +205,10 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex ...prev.files, ...response.files ], + repoResults: response.repoResults ? [ + ...prev.repoResults, + ...response.repoResults + ] : prev.repoResults, repoInfo: { ...prev.repoInfo, ...response.repositoryInfo.reduce((acc, repo) => { @@ -218,6 +227,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex ...prev, isExhaustive: response.isSearchExhaustive, stats: response.accumulatedStats, + ...(response.repoResults ? { repoResults: response.repoResults } : {}), ...(isFirstMessage ? { timeToFirstSearchResultMs: performance.now() - startTime, } : {}), @@ -237,6 +247,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex searchCache.set(cacheKey, { files: prev.files, repoInfo: prev.repoInfo, + repoResults: prev.repoResults, isExhaustive: prev.isExhaustive, numMatches: prev.numMatches, timeToFirstSearchResultMs: prev.timeToFirstSearchResultMs, diff --git a/packages/web/src/features/search/index.ts b/packages/web/src/features/search/index.ts index 665e070eb..ef860d459 100644 --- a/packages/web/src/features/search/index.ts +++ b/packages/web/src/features/search/index.ts @@ -12,4 +12,5 @@ export type { StreamedSearchResponse, SearchResultChunk, SearchResponse, + RepoResult, } from './types'; \ No newline at end of file diff --git a/packages/web/src/features/search/parser.ts b/packages/web/src/features/search/parser.ts index b0e1e09af..7f5b5a300 100644 --- a/packages/web/src/features/search/parser.ts +++ b/packages/web/src/features/search/parser.ts @@ -21,6 +21,7 @@ import { QuotedTerm, Tree, VisibilityExpr, + SelectExpr, } from '@sourcebot/query-language'; import { parser as _parser } from '@sourcebot/query-language'; import { PrismaClient } from '@sourcebot/db'; @@ -64,6 +65,40 @@ const findLinguistLanguage = (value: string): string => { return languageKeyLowerCaseMap.get(value.toLowerCase()) ?? value; } +export type SelectMode = 'repo' | null; + +/** + * Parses a raw query string and returns the select: modifier value, if present. + * Uses the same Lezer parser as the backend so behaviour is identical. + * Safe to call on the client — uses the non-strict parser variant. + */ +export const getSelectModeFromQuery = (query: string): SelectMode => { + const tree = _parser.parse(query); + return extractSelectMode(tree, query); +}; + +/** + * Extracts the select: modifier from a Lezer tree. + * Returns 'repo' or null. + */ +const extractSelectMode = (tree: Tree, input: string): SelectMode => { + let selectMode: SelectMode = null; + const cursor = tree.cursor(); + do { + if (cursor.name === 'SelectExpr') { + const text = input.substring(cursor.from, cursor.to); + const colonIndex = text.indexOf(':'); + if (colonIndex !== -1) { + const value = text.substring(colonIndex + 1).trim(); + if (value === 'repo') { + selectMode = value; + } + } + } + } while (cursor.next()); + return selectMode; +} + /** * Given a query string, parses it into the query intermediate representation. */ @@ -78,14 +113,17 @@ export const parseQuerySyntaxIntoIR = async ({ isRegexEnabled?: boolean; }, prisma: PrismaClient, -}): Promise => { +}): Promise<{ ir: QueryIR, selectMode: SelectMode }> => { try { // First parse the query into a Lezer tree. const tree = parser.parse(query); + // Extract the select mode before transforming (select: is a projection modifier, not a zoekt filter) + const selectMode = extractSelectMode(tree, query); + // Then transform the tree into the intermediate representation. - return transformTreeToIR({ + const ir = await transformTreeToIR({ tree, input: query, isCaseSensitivityEnabled: options.isCaseSensitivityEnabled ?? false, @@ -110,6 +148,8 @@ export const parseQuerySyntaxIntoIR = async ({ return context.repos.map((repo) => repo.name); }, }); + + return { ir, selectMode }; } catch (error) { if (error instanceof SyntaxError) { throw new ServiceErrorException({ @@ -415,6 +455,11 @@ const transformTreeToIR = async ({ query: "repo_set" }; } + case SelectExpr: { + // select: is a projection modifier — no-op match-all, optimized away by zoekt + return { const: true, query: "const" }; + } + default: throw new Error(`Unknown prefix type: ${prefixNode.type.name} (id: ${prefixTypeId})`); } diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index b21f05c07..d60a00957 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -6,9 +6,9 @@ import { PrismaClient, UserWithAccounts } from "@sourcebot/db"; import { env, hasEntitlement } from "@sourcebot/shared"; import { headers } from "next/headers"; import { QueryIR } from './ir'; -import { parseQuerySyntaxIntoIR } from './parser'; -import { SearchOptions } from "./types"; -import { createZoektSearchRequest, zoektSearch, zoektStreamSearch } from './zoektSearcher'; +import { parseQuerySyntaxIntoIR, SelectMode } from './parser'; +import { SearchOptions, SearchResponse, RepoResult } from "./types"; +import { accumulateRepoMap, createZoektSearchRequest, zoektSearch, zoektStreamSearch } from './zoektSearcher'; type QueryStringSearchRequest = { @@ -21,7 +21,6 @@ type QueryStringSearchRequest = { type QueryIRSearchRequest = { queryType: 'ir'; query: QueryIR; - // Omit options that are specific to query syntax parsing. options: Omit; source?: string; } @@ -43,12 +42,16 @@ export const search = (request: SearchRequest) => sew(() => const repoSearchScope = await getAccessibleRepoNamesForUser({ user, prisma }); - // If needed, parse the query syntax into the query intermediate representation. - const query = request.queryType === 'string' ? await parseQuerySyntaxIntoIR({ - query: request.query, - options: request.options, - prisma, - }) : request.query; + let selectMode: SelectMode = null; + const query = request.queryType === 'string' ? await (async () => { + const { ir, selectMode: mode } = await parseQuerySyntaxIntoIR({ + query: request.query, + options: request.options, + prisma, + }); + selectMode = mode; + return ir; + })() : request.query; const zoektSearchRequest = await createZoektSearchRequest({ query, @@ -56,7 +59,11 @@ export const search = (request: SearchRequest) => sew(() => repoSearchScope, }); - return zoektSearch(zoektSearchRequest, prisma); + const result = await zoektSearch(zoektSearchRequest, prisma); + if (selectMode === 'repo') { + return applySelectRepo(result); + } + return result; })); export const streamSearch = (request: SearchRequest) => sew(() => @@ -74,12 +81,16 @@ export const streamSearch = (request: SearchRequest) => sew(() => const repoSearchScope = await getAccessibleRepoNamesForUser({ user, prisma }); - // If needed, parse the query syntax into the query intermediate representation. - const query = request.queryType === 'string' ? await parseQuerySyntaxIntoIR({ - query: request.query, - options: request.options, - prisma, - }) : request.query; + let selectMode: SelectMode = null; + const query = request.queryType === 'string' ? await (async () => { + const { ir, selectMode: mode } = await parseQuerySyntaxIntoIR({ + query: request.query, + options: request.options, + prisma, + }); + selectMode = mode; + return ir; + })() : request.query; const zoektSearchRequest = await createZoektSearchRequest({ query, @@ -87,13 +98,9 @@ export const streamSearch = (request: SearchRequest) => sew(() => repoSearchScope, }); - return zoektStreamSearch(zoektSearchRequest, prisma); + return zoektStreamSearch(zoektSearchRequest, prisma, selectMode); })); -/** - * Returns a list of repository names that the user has access to. - * If permission syncing is disabled, returns undefined. - */ const getAccessibleRepoNamesForUser = async ({ user, prisma }: { user?: UserWithAccounts, prisma: PrismaClient }) => { if ( env.PERMISSION_SYNC_ENABLED !== 'true' || @@ -110,3 +117,13 @@ const getAccessibleRepoNamesForUser = async ({ user, prisma }: { user?: UserWith }); return accessibleRepos.map(repo => repo.name); } + +const applySelectRepo = (result: SearchResponse): SearchResponse => { + const repoMap = new Map(); + accumulateRepoMap(result.files, result.repositoryInfo, repoMap); + return { + ...result, + repoResults: Array.from(repoMap.values()).sort((a, b) => b.matchCount - a.matchCount), + files: [], + }; +}; diff --git a/packages/web/src/features/search/types.ts b/packages/web/src/features/search/types.ts index e053c8e20..69c030be6 100644 --- a/packages/web/src/features/search/types.ts +++ b/packages/web/src/features/search/types.ts @@ -30,6 +30,14 @@ export const repositoryInfoSchema = z.object({ }); export type RepositoryInfo = z.infer; +export const repoResultSchema = z.object({ + repositoryId: z.number(), + repository: z.string(), + repositoryInfo: repositoryInfoSchema.optional(), + matchCount: z.number(), +}); +export type RepoResult = z.infer; + // @note: Many of these fields are defined in zoekt/api.go. export const searchStatsSchema = z.object({ actualMatchCount: z.number(), // The actual number of matches returned by the search. This will always be less than or equal to `totalMatchCount`. @@ -104,6 +112,7 @@ export const searchResponseSchema = z.object({ files: z.array(searchFileSchema), repositoryInfo: z.array(repositoryInfoSchema), isSearchExhaustive: z.boolean(), + repoResults: z.array(repoResultSchema).optional(), }); export type SearchResponse = z.infer; @@ -115,6 +124,7 @@ export const streamedSearchChunkResponseSchema = z.object({ stats: searchStatsSchema, files: z.array(searchFileSchema), repositoryInfo: z.array(repositoryInfoSchema), + repoResults: z.array(repoResultSchema).optional(), }); export type StreamedSearchChunkResponse = z.infer; @@ -125,6 +135,7 @@ export const streamedSearchFinalResponseSchema = z.object({ type: z.literal('final'), accumulatedStats: searchStatsSchema, isSearchExhaustive: z.boolean(), + repoResults: z.array(repoResultSchema).optional(), }); export type StreamedSearchFinalResponse = z.infer; diff --git a/packages/web/src/features/search/zoektSearcher.ts b/packages/web/src/features/search/zoektSearcher.ts index 7203521ba..387434077 100644 --- a/packages/web/src/features/search/zoektSearcher.ts +++ b/packages/web/src/features/search/zoektSearcher.ts @@ -16,7 +16,7 @@ import { PrismaClient, Repo } from "@sourcebot/db"; import { createLogger, env } from "@sourcebot/shared"; import path from 'path'; import { isBranchQuery, QueryIR, someInQueryIR } from './ir'; -import { RepositoryInfo, SearchResponse, SearchResultFile, SearchStats, SourceRange, StreamedSearchErrorResponse, StreamedSearchResponse } from "./types"; +import { RepositoryInfo, RepoResult, SearchResponse, SearchResultFile, SearchStats, SourceRange, StreamedSearchErrorResponse, StreamedSearchResponse } from "./types"; import { captureEvent } from "@/lib/posthog"; import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; @@ -148,7 +148,29 @@ export const zoektSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: }); } -export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: PrismaClient): Promise => { +/** + * Accumulates file results into a per-repository match-count map. + * Shared by the streaming (zoektStreamSearch) and non-streaming (applySelectRepo) paths + * to ensure identical aggregation behaviour. + */ +export const accumulateRepoMap = ( + files: SearchResultFile[], + repositoryInfo: RepositoryInfo[], + repoMap: Map, +): void => { + for (const file of files) { + const repoId = file.repositoryId; + const matchCount = file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); + if (!repoMap.has(repoId)) { + const ri = repositoryInfo.find(r => r.id === repoId); + repoMap.set(repoId, { repositoryId: repoId, repository: file.repository, repositoryInfo: ri, matchCount }); + } else { + repoMap.get(repoId)!.matchCount += matchCount; + } + } +}; + +export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: PrismaClient, selectMode?: string | null): Promise => { const client = createGrpcClient(); let grpcStream: ReturnType | null = null; let isStreamActive = true; @@ -177,6 +199,8 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p flushReason: ZoektGrpcFlushReason.FLUSH_REASON_UNKNOWN_UNSPECIFIED, }; + const _accumulatedRepoMap = new Map(); + return new ReadableStream({ async start(controller) { const tryCloseController = () => { @@ -185,6 +209,9 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p type: 'final', accumulatedStats, isSearchExhaustive: accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount, + ...(selectMode === 'repo' ? { + repoResults: Array.from(_accumulatedRepoMap.values()).sort((a, b) => b.matchCount - a.matchCount) + } : {}), } controller.enqueue(encodeSSEREsponseChunk(finalResponse)); @@ -234,11 +261,16 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p accumulatedStats = accumulateStats(accumulatedStats, stats); + // Accumulate repo map for select:repo mode + if (selectMode === 'repo') { + accumulateRepoMap(files, repositoryInfo, _accumulatedRepoMap); + } + const response: StreamedSearchResponse = { type: 'chunk', - files, + files: selectMode === 'repo' ? [] : files, repositoryInfo, - stats + stats, } controller.enqueue(encodeSSEREsponseChunk(response));