diff --git a/packages/vector-search/README.md b/packages/vector-search/README.md new file mode 100644 index 00000000..b831fc00 --- /dev/null +++ b/packages/vector-search/README.md @@ -0,0 +1,377 @@ +# @databricks/appkit-vector-search + +Appkit plugin that adds Databricks Vector Search to your app — backend routes, React hook, and UI components in one package. + +## Quick Start + +**Backend** (`app.ts`): + +```typescript +import { createApp } from '@databricks/appkit'; +import { VectorSearchPlugin } from '@databricks/appkit-vector-search'; + +createApp({ + plugins: [ + new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description', 'price', 'category'], + }, + }, + }), + ], +}); +``` + +**Frontend** (`ProductSearch.tsx`): + +```tsx +import { useVectorSearch, SearchBox, SearchResults } from '@databricks/appkit-vector-search'; + +function ProductSearch() { + const vs = useVectorSearch<{ id: string; name: string; description: string; price: number; category: string }>('products'); + + return ( +
+ + +
+ ); +} +``` + +That's it — hybrid search with debouncing, loading states, keyword highlighting, and error handling. + +## Installation + +```bash +npm install @databricks/appkit-vector-search +``` + +Peer dependencies: `react ^18.x`, `@databricks/appkit ^0.x`. + +## Backend Setup + +Register the plugin with `createApp`. Each key in `indexes` is an **alias** used by the frontend hook and API routes. + +```typescript +new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'catalog.schema.product_index', // required — three-level UC name + columns: ['id', 'name', 'description'], // required — columns to return + queryType: 'hybrid', // 'ann' | 'hybrid' | 'full_text' (default: 'hybrid') + numResults: 20, // max results per query (default: 20) + reranker: false, // enable Databricks reranker (default: false) + auth: 'service-principal', // 'service-principal' | 'on-behalf-of-user' (default: 'service-principal') + cache: { enabled: false }, // see Caching section + pagination: false, // see Pagination section + endpointName: 'my-endpoint', // required when pagination: true + embeddingFn: undefined, // see Self-Managed Embeddings section + }, + }, +}) +``` + +### IndexConfig Reference + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `indexName` | `string` | *required* | Three-level UC name (`catalog.schema.index`) | +| `columns` | `string[]` | *required* | Columns to return in results | +| `queryType` | `'ann' \| 'hybrid' \| 'full_text'` | `'hybrid'` | Default search mode | +| `numResults` | `number` | `20` | Max results per query | +| `reranker` | `boolean \| { columnsToRerank: string[] }` | `false` | Enable built-in reranker | +| `auth` | `'service-principal' \| 'on-behalf-of-user'` | `'service-principal'` | Auth mode | +| `cache` | `CacheConfig` | `undefined` | Optional result caching | +| `pagination` | `boolean` | `false` | Enable cursor pagination | +| `endpointName` | `string` | `undefined` | VS endpoint name (required if `pagination: true`) | +| `embeddingFn` | `(text: string) => Promise` | `undefined` | Custom embedding function for self-managed indexes | + +## Frontend + +### `useVectorSearch` Hook + +```typescript +const vs = useVectorSearch('products', { + debounceMs: 300, // debounce delay (default: 300) + numResults: 10, // override server default + queryType: 'ann', // override server default + reranker: true, // override server default + minQueryLength: 2, // minimum chars before searching (default: 1) + initialFilters: { category: 'electronics' }, + onResults: (response) => console.log(response), + onError: (error) => console.error(error), +}); +``` + +**Returns:** + +| Property | Type | Description | +|----------|------|-------------| +| `search` | `(query: string) => void` | Execute a search (debounced) | +| `results` | `SearchResult[]` | Current results (each has `.score` and `.data`) | +| `isLoading` | `boolean` | Whether a search is in flight | +| `error` | `SearchError \| null` | Error from last search | +| `query` | `string` | Current query text | +| `totalCount` | `number` | Total result count | +| `queryTimeMs` | `number` | Query execution time in ms | +| `fromCache` | `boolean` | Whether results came from cache | +| `setFilters` | `(filters) => void` | Set filters and re-execute search | +| `activeFilters` | `SearchFilters` | Current active filters | +| `clear` | `() => void` | Clear query, results, and filters | +| `hasMore` | `boolean` | More results available (pagination) | +| `loadMore` | `() => void` | Fetch next page, append to results | +| `isLoadingMore` | `boolean` | Whether loadMore is in flight | + +The hook handles debouncing, request cancellation (AbortController), filter reactivity, and cleanup on unmount. + +### Components + +#### `` + +```tsx + +``` + +Includes search icon, clear button (appears when input has value), Escape key to clear, and loading spinner. + +#### `` + +```tsx + ...} // fully custom result rendering (overrides default card) + className="mt-4" +/> +``` + +States: loading skeleton (3 animated cards), error banner, empty message, results with count + timing. + +#### `` + +Used internally by `SearchResults`, but can be used standalone: + +```tsx + +``` + +#### `` + +```tsx + +``` + +### Filters + +Use `setFilters` from the hook to apply VS filter syntax: + +```typescript +// IN list +vs.setFilters({ category: ['electronics', 'books'] }); + +// Comparison operators +vs.setFilters({ 'price >=': 10, 'price <=': 100 }); + +// NOT +vs.setFilters({ 'title NOT': 'test' }); + +// LIKE +vs.setFilters({ 'name LIKE': 'data%' }); + +// OR across columns +vs.setFilters({ 'color1 OR color2': ['red', 'blue'] }); +``` + +Calling `setFilters` immediately re-executes the current search with the new filters. + +## Auth + +### Service Principal (default) + +The plugin uses `DATABRICKS_CLIENT_ID` and `DATABRICKS_CLIENT_SECRET` from the environment. When deployed to Databricks Apps, these are set automatically. OAuth tokens are cached and refreshed with a 2-minute buffer before expiry. + +No configuration needed — this is the default. + +### On-Behalf-of-User + +For indexes with row-level security or Unity Catalog permissions: + +```typescript +indexes: { + docs: { + indexName: 'catalog.schema.docs_index', + columns: ['id', 'title', 'content'], + auth: 'on-behalf-of-user', // uses the logged-in user's token + }, +} +``` + +The plugin extracts the user's OAuth token from the `x-forwarded-access-token` header (set by Databricks Apps proxy). Queries run with the user's identity and UC permissions. + +## Self-Managed Embeddings + +For indexes that don't use Databricks-managed embeddings, provide an `embeddingFn` that converts query text to a vector: + +```typescript +indexes: { + custom: { + indexName: 'catalog.schema.custom_index', + columns: ['id', 'title', 'content'], + queryType: 'ann', + embeddingFn: async (text) => { + const resp = await fetch( + `https://${process.env.DATABRICKS_HOST}/serving-endpoints/my-embedding-model/invocations`, + { + method: 'POST', + headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' }, + body: JSON.stringify({ input: [text] }), + }, + ); + const data = await resp.json(); + return data.data[0].embedding; + }, + }, +} +``` + +When `embeddingFn` is set, the plugin calls it to convert `queryText` into `queryVector` before sending to VS. The frontend hook works identically — users type text, the backend handles the conversion. + +If omitted, the plugin sends `queryText` directly and VS computes embeddings server-side (managed mode). + +## Caching + +Optional LRU cache for search results. Off by default (freeform search has low cache hit rates). + +```typescript +indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description'], + cache: { + enabled: true, + ttlSeconds: 120, // time-to-live per entry (default: 60) + maxEntries: 1000, // max cached queries (default: 1000) + }, + }, +} +``` + +Cached responses include `fromCache: true` in the response. The hook exposes this via `vs.fromCache`. + +## Pagination + +Cursor-based pagination for large result sets. Off by default — VS typically returns results in 20-40ms, so most apps don't need it. + +```typescript +indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description'], + pagination: true, + endpointName: 'my-vs-endpoint', // required when pagination is enabled + }, +} +``` + +Frontend usage: + +```tsx +const vs = useVectorSearch('products'); + +return ( + <> + + + + +); +``` + +`loadMore` fetches the next page and appends results to the existing array. + +## API Reference + +The plugin registers these Express routes automatically: + +| Method | Path | Body | Description | +|--------|------|------|-------------| +| `POST` | `/api/vector-search/:alias/query` | `SearchRequest` | Execute a search | +| `POST` | `/api/vector-search/:alias/next-page` | `{ pageToken: string }` | Fetch next page (requires `pagination: true`) | +| `GET` | `/api/vector-search/:alias/config` | — | Returns index config (columns, queryType, numResults, etc.) | + +### SearchRequest Body + +```json +{ + "queryText": "wireless headphones", + "filters": { "category": ["electronics"] }, + "numResults": 10, + "queryType": "hybrid", + "reranker": true +} +``` + +### SearchResponse + +```json +{ + "results": [ + { "score": 0.92, "data": { "id": "1", "name": "...", "description": "..." } } + ], + "totalCount": 47, + "queryTimeMs": 35, + "queryType": "hybrid", + "fromCache": false, + "nextPageToken": null +} +``` + +### Error Response + +```json +{ + "code": "INVALID_QUERY", + "message": "queryText or queryVector is required", + "statusCode": 400 +} +``` + +Error codes: `UNAUTHORIZED`, `INDEX_NOT_FOUND`, `INVALID_QUERY`, `RATE_LIMITED`, `INTERNAL`. diff --git a/packages/vector-search/package.json b/packages/vector-search/package.json new file mode 100644 index 00000000..efdeb804 --- /dev/null +++ b/packages/vector-search/package.json @@ -0,0 +1,47 @@ +{ + "name": "@databricks/appkit-vector-search", + "version": "0.1.0", + "description": "Vector Search plugin for Databricks Appkit", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "types": "./dist/index.d.ts" + }, + "./ui": { + "import": "./dist/ui/index.js", + "types": "./dist/ui/index.d.ts" + } + }, + "scripts": { + "build": "tsc", + "dev": "tsc --watch", + "test": "vitest run", + "test:watch": "vitest" + }, + "peerDependencies": { + "@databricks/appkit": "^0.1.0", + "react": "^18.0.0" + }, + "dependencies": { + "@radix-ui/react-icons": "^1.3.0", + "@radix-ui/react-select": "^2.1.0", + "@radix-ui/react-slider": "^1.2.0", + "express": "^5.2.1" + }, + "devDependencies": { + "@testing-library/jest-dom": "^6.4.0", + "@testing-library/react": "^15.0.0", + "@types/express": "^4.17.0", + "@types/react": "^18.3.0", + "@types/react-dom": "^19.2.3", + "@types/supertest": "^7.2.0", + "jsdom": "^24.0.0", + "react": "^19.2.4", + "react-dom": "^19.2.4", + "supertest": "^7.2.2", + "typescript": "^5.4.0", + "vitest": "^1.6.0" + } +} diff --git a/packages/vector-search/src/index.ts b/packages/vector-search/src/index.ts new file mode 100644 index 00000000..dbe386e9 --- /dev/null +++ b/packages/vector-search/src/index.ts @@ -0,0 +1,13 @@ +export { VectorSearchPlugin } from './plugin/VectorSearchPlugin'; +export { createVectorSearchRouter } from './plugin/routes'; +export type { + VectorSearchPluginConfig, + IndexConfig, + RerankerConfig, + CacheConfig, + SearchRequest, + SearchResponse, + SearchResult, + SearchFilters, + SearchError, +} from './plugin/types'; diff --git a/packages/vector-search/src/plugin/VectorSearchClient.ts b/packages/vector-search/src/plugin/VectorSearchClient.ts new file mode 100644 index 00000000..d80ceb86 --- /dev/null +++ b/packages/vector-search/src/plugin/VectorSearchClient.ts @@ -0,0 +1,200 @@ +import type { SearchResponse, SearchFilters, SearchError, RerankerConfig, TokenProvider, VsRawResponse } from './types'; + +export class VectorSearchClient { + private host: string; + private tokenProvider: TokenProvider; + + constructor(config: { host: string; tokenProvider: TokenProvider }) { + this.host = config.host; + this.tokenProvider = config.tokenProvider; + } + + async query(params: { + indexName: string; + queryText?: string; + queryVector?: number[]; + columns: string[]; + numResults: number; + queryType: 'ann' | 'hybrid' | 'full_text'; + filters?: SearchFilters; + reranker?: boolean | RerankerConfig; + userToken?: string; + embeddingFn?: (text: string) => Promise; + }): Promise { + const token = params.userToken ?? await this.tokenProvider.getToken(); + + // Resolve query: managed (query_text) vs self-managed (query_vector) + let queryText = params.queryText; + let queryVector = params.queryVector; + + if (params.embeddingFn && queryText && !queryVector) { + queryVector = await params.embeddingFn(queryText); + queryText = undefined; + } + + if (!queryText && !queryVector) { + throw { + code: 'INVALID_QUERY' as const, + message: 'Either queryText or queryVector is required', + statusCode: 400, + }; + } + + const body: Record = { + columns: params.columns, + num_results: params.numResults, + query_type: params.queryType.toUpperCase(), + debug_level: 1, + }; + + if (queryText) body.query_text = queryText; + if (queryVector) body.query_vector = queryVector; + + if (params.filters && Object.keys(params.filters).length > 0) { + body.filters = params.filters; + } + + if (params.reranker) { + const columnsToRerank = typeof params.reranker === 'object' + ? params.reranker.columnsToRerank + : params.columns.filter(c => c !== 'id'); + body.reranker = { + model: 'databricks_reranker', + parameters: { columns_to_rerank: columnsToRerank }, + }; + } + + const response = await this.fetchWithRetry( + `https://${this.host}/api/2.0/vector-search/indexes/${params.indexName}/query`, + { + method: 'POST', + headers: { + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(body), + }, + ); + + if (!response.ok) throw this.mapError(response); + const raw = await response.json() as VsRawResponse; + return this.parseResponse(raw, params.queryType); + } + + async queryNextPage(params: { + indexName: string; + endpointName: string; + pageToken: string; + userToken?: string; + }): Promise { + const token = params.userToken ?? await this.tokenProvider.getToken(); + + const response = await this.fetchWithRetry( + `https://${this.host}/api/2.0/vector-search/indexes/${params.indexName}/query-next-page`, + { + method: 'POST', + headers: { + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + endpoint_name: params.endpointName, + page_token: params.pageToken, + }), + }, + ); + + if (!response.ok) throw this.mapError(response); + const raw = await response.json() as VsRawResponse; + return this.parseResponse(raw, 'hybrid'); + } + + private parseResponse(raw: VsRawResponse, queryType: 'ann' | 'hybrid' | 'full_text'): SearchResponse { + const columnNames = raw.manifest.columns.map(c => c.name); + const scoreIndex = columnNames.indexOf('score'); + + const results = raw.result.data_array.map(row => { + const data: Record = {}; + for (let i = 0; i < columnNames.length; i++) { + if (columnNames[i] !== 'score') data[columnNames[i]] = row[i]; + } + return { + score: scoreIndex >= 0 ? (row[scoreIndex] as number) : 0, + data, + }; + }); + + return { + results, + totalCount: raw.result.row_count, + queryTimeMs: raw.debug_info?.response_time ?? raw.debug_info?.latency_ms ?? 0, + queryType, + fromCache: false, + nextPageToken: raw.next_page_token ?? null, + }; + } + + private mapError(response: { status: number }): SearchError { + const codeMap: Record = { + 401: 'UNAUTHORIZED', + 403: 'UNAUTHORIZED', + 404: 'INDEX_NOT_FOUND', + 400: 'INVALID_QUERY', + 429: 'RATE_LIMITED', + }; + return { + code: codeMap[response.status] ?? 'INTERNAL', + message: `VS query failed with status ${response.status}`, + statusCode: response.status, + }; + } + + private async fetchWithRetry( + url: string, + options: RequestInit, + maxRetries = 3, + backoffMs = 1, + ): Promise { + let lastError: Error | null = null; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const response = await fetch(url, options); + + // Don't retry client errors (4xx except 429) + if (response.status >= 400 && response.status < 500 && response.status !== 429) { + return response; + } + + if (response.ok) { + return response; + } + + // Retry 429 and 5xx + lastError = new Error(`HTTP ${response.status}`); + if (attempt < maxRetries) { + await new Promise(r => setTimeout(r, backoffMs)); + continue; + } + return response; + } catch (err) { + lastError = err as Error; + if (attempt < maxRetries) { + await new Promise(r => setTimeout(r, backoffMs)); + continue; + } + throw { + code: 'INTERNAL' as const, + message: `Network error: ${lastError.message}`, + statusCode: 500, + }; + } + } + + throw { + code: 'INTERNAL' as const, + message: 'Failed after retries', + statusCode: 500, + }; + } +} diff --git a/packages/vector-search/src/plugin/VectorSearchPlugin.ts b/packages/vector-search/src/plugin/VectorSearchPlugin.ts new file mode 100644 index 00000000..97d4de92 --- /dev/null +++ b/packages/vector-search/src/plugin/VectorSearchPlugin.ts @@ -0,0 +1,105 @@ +import type { VectorSearchPluginConfig, IndexConfig, SearchRequest, SearchResponse } from './types'; +import { VectorSearchClient } from './VectorSearchClient'; +import { ServicePrincipalTokenProvider, OboTokenExtractor } from './auth'; + +export class VectorSearchPlugin { + static manifest = { + name: 'vector-search', + description: 'Query Databricks Vector Search indexes from your app', + resources: { required: [] as any[], optional: [] as any[] }, + env: [ + { name: 'DATABRICKS_HOST', description: 'Databricks workspace hostname', source: 'auto' }, + { name: 'DATABRICKS_CLIENT_ID', description: 'Service principal client ID', source: 'auto' }, + { name: 'DATABRICKS_CLIENT_SECRET', description: 'Service principal client secret', source: 'auto' }, + ], + }; + + private config: VectorSearchPluginConfig; + private client!: VectorSearchClient; + private spTokenProvider!: ServicePrincipalTokenProvider; + + constructor(config: VectorSearchPluginConfig) { + this.config = config; + } + + async setup(): Promise { + const host = process.env.DATABRICKS_HOST; + if (!host) { + throw new Error( + 'DATABRICKS_HOST is not set. Ensure the app is deployed to Databricks Apps or set the environment variable manually.', + ); + } + + // Fail-fast config validation + for (const [alias, idx] of Object.entries(this.config.indexes)) { + if (!idx.indexName) { + throw new Error(`Index "${alias}" is missing required field "indexName"`); + } + if (!idx.columns || idx.columns.length === 0) { + throw new Error(`Index "${alias}" is missing required field "columns"`); + } + if (idx.pagination && !idx.endpointName) { + throw new Error(`Index "${alias}" has pagination enabled but is missing "endpointName"`); + } + } + + this.spTokenProvider = new ServicePrincipalTokenProvider(host); + this.client = new VectorSearchClient({ host, tokenProvider: this.spTokenProvider }); + } + + async shutdown(): Promise { + // No cleanup needed currently + } + + getResourceRequirements() { + return Object.values(this.config.indexes).map((idx) => ({ + type: 'vector-search-index' as const, + name: idx.indexName, + permission: 'SELECT' as const, + })); + } + + exports() { + return { + query: (alias: string, request: SearchRequest) => this.executeQuery(alias, request), + }; + } + + /** Resolve an index alias to its config. Throws if not found. */ + resolveIndex(alias: string): IndexConfig { + const config = this.config.indexes[alias]; + if (!config) { + throw { + code: 'INDEX_NOT_FOUND' as const, + message: `No index configured with alias "${alias}"`, + statusCode: 404, + }; + } + return config; + } + + /** Get the VS client instance (used by route handlers) */ + getClient(): VectorSearchClient { + return this.client; + } + + /** Get the full plugin config (used by route handlers) */ + getConfig(): VectorSearchPluginConfig { + return this.config; + } + + private async executeQuery(alias: string, request: SearchRequest): Promise { + const indexConfig = this.resolveIndex(alias); + return this.client.query({ + indexName: indexConfig.indexName, + queryText: request.queryText, + queryVector: request.queryVector, + columns: request.columns ?? indexConfig.columns, + numResults: request.numResults ?? indexConfig.numResults ?? 20, + queryType: request.queryType ?? indexConfig.queryType ?? 'hybrid', + filters: request.filters, + reranker: request.reranker ?? indexConfig.reranker ?? false, + embeddingFn: indexConfig.embeddingFn, + }); + } +} diff --git a/packages/vector-search/src/plugin/auth.ts b/packages/vector-search/src/plugin/auth.ts new file mode 100644 index 00000000..62211a8e --- /dev/null +++ b/packages/vector-search/src/plugin/auth.ts @@ -0,0 +1,48 @@ +import type { TokenProvider, SearchError } from './types'; + +export class ServicePrincipalTokenProvider implements TokenProvider { + private token: string | null = null; + private expiresAt = 0; + private host: string; + + constructor(host: string) { + this.host = host; + } + + async getToken(): Promise { + if (this.token && Date.now() < this.expiresAt - 120_000) { + return this.token; + } + + const response = await fetch(`https://${this.host}/oidc/v1/token`, { + method: 'POST', + headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, + body: new URLSearchParams({ + grant_type: 'client_credentials', + client_id: process.env.DATABRICKS_CLIENT_ID!, + client_secret: process.env.DATABRICKS_CLIENT_SECRET!, + scope: 'all-apis', + }).toString(), + }); + + const data = await response.json(); + this.token = data.access_token; + this.expiresAt = Date.now() + data.expires_in * 1000; + return this.token!; + } +} + +export class OboTokenExtractor { + static extractFromRequest(req: { headers: Record }): string { + const token = req.headers['x-forwarded-access-token']; + if (!token) { + const error: SearchError = { + code: 'UNAUTHORIZED', + message: 'No user token found. Ensure app is configured for user authorization.', + statusCode: 401, + }; + throw error; + } + return token; + } +} diff --git a/packages/vector-search/src/plugin/routes.ts b/packages/vector-search/src/plugin/routes.ts new file mode 100644 index 00000000..cd58e354 --- /dev/null +++ b/packages/vector-search/src/plugin/routes.ts @@ -0,0 +1,136 @@ +import { Router } from 'express'; +import type { Request, Response } from 'express'; +import type { VectorSearchPlugin } from './VectorSearchPlugin'; +import { OboTokenExtractor } from './auth'; +import type { SearchRequest } from './types'; + +export function createVectorSearchRouter(plugin: VectorSearchPlugin): Router { + const router = Router(); + + // POST /:alias/query + router.post('/:alias/query', async (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + const body: SearchRequest = req.body; + + if (!body.queryText && !body.queryVector) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: 'queryText or queryVector is required', + statusCode: 400, + }); + } + + // Resolve auth + let userToken: string | undefined; + if (indexConfig.auth === 'on-behalf-of-user') { + try { + userToken = OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + return res.status(401).json(err); + } + } + + try { + const client = plugin.getClient(); + const response = await client.query({ + indexName: indexConfig.indexName, + queryText: body.queryText, + queryVector: body.queryVector, + columns: body.columns ?? indexConfig.columns, + numResults: body.numResults ?? indexConfig.numResults ?? 20, + queryType: body.queryType ?? indexConfig.queryType ?? 'hybrid', + filters: body.filters, + reranker: body.reranker ?? indexConfig.reranker ?? false, + userToken, + embeddingFn: indexConfig.embeddingFn, + }); + + return res.json(response); + } catch (err: any) { + return res.status(err.statusCode ?? 500).json(err); + } + }); + + // POST /:alias/next-page + router.post('/:alias/next-page', async (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + if (!indexConfig.pagination) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: `Pagination is not enabled for index "${alias}"`, + statusCode: 400, + }); + } + + const { pageToken } = req.body; + if (!pageToken) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: 'pageToken is required', + statusCode: 400, + }); + } + + let userToken: string | undefined; + if (indexConfig.auth === 'on-behalf-of-user') { + try { + userToken = OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + return res.status(401).json(err); + } + } + + try { + const client = plugin.getClient(); + const response = await client.queryNextPage({ + indexName: indexConfig.indexName, + endpointName: indexConfig.endpointName!, + pageToken, + userToken, + }); + + return res.json(response); + } catch (err: any) { + return res.status(err.statusCode ?? 500).json(err); + } + }); + + // GET /:alias/config + router.get('/:alias/config', (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + return res.json({ + alias, + columns: indexConfig.columns, + queryType: indexConfig.queryType ?? 'hybrid', + numResults: indexConfig.numResults ?? 20, + reranker: !!indexConfig.reranker, + pagination: !!indexConfig.pagination, + }); + }); + + return router; +} diff --git a/packages/vector-search/src/plugin/types.ts b/packages/vector-search/src/plugin/types.ts new file mode 100644 index 00000000..f8ad1e84 --- /dev/null +++ b/packages/vector-search/src/plugin/types.ts @@ -0,0 +1,198 @@ +// ============================================ +// Plugin Configuration Types +// ============================================ + +export interface VectorSearchPluginConfig { + indexes: Record; +} + +export interface IndexConfig { + /** Three-level UC name: catalog.schema.index_name */ + indexName: string; + /** Columns to return in results */ + columns: string[]; + /** Default search mode */ + queryType?: 'ann' | 'hybrid' | 'full_text'; // default: 'hybrid' + /** Max results per query */ + numResults?: number; // default: 20 + /** Enable built-in reranker */ + reranker?: boolean | RerankerConfig; // default: false + /** Auth mode */ + auth?: 'service-principal' | 'on-behalf-of-user'; // default: 'service-principal' + /** Result caching */ + cache?: CacheConfig; + /** Enable cursor pagination */ + pagination?: boolean; // default: false + /** VS endpoint name (required if pagination: true) */ + endpointName?: string; + /** + * For self-managed embedding indexes: converts query text to embedding vector. + * If provided, the plugin calls this function and sends query_vector to VS. + * If omitted, the plugin sends query_text and VS computes embeddings (managed mode). + */ + embeddingFn?: (text: string) => Promise; +} + +export interface RerankerConfig { + columnsToRerank: string[]; +} + +export interface CacheConfig { + enabled: boolean; + ttlSeconds?: number; // default: 60 + maxEntries?: number; // default: 1000 +} + +// ============================================ +// Query Types (frontend → backend) +// ============================================ + +export interface SearchRequest { + /** Text query. Required for managed embedding indexes. */ + queryText?: string; + /** Pre-computed embedding vector. Required for self-managed indexes without embeddingFn. */ + queryVector?: number[]; + /** Override default columns for this query */ + columns?: string[]; + /** Override default numResults for this query */ + numResults?: number; + /** Override default queryType for this query */ + queryType?: 'ann' | 'hybrid' | 'full_text'; + /** Metadata filters */ + filters?: SearchFilters; + /** Override reranker for this query */ + reranker?: boolean; +} + +/** + * Filters use the VS REST API filter format. + * Keys are column names with optional operators. + * + * Examples: + * { category: ['electronics', 'books'] } // IN list + * { 'price >=': 10 } // comparison + * { 'title NOT': 'test' } // NOT + * { 'name LIKE': 'data%' } // LIKE + * { 'color1 OR color2': ['red', 'blue'] } // OR across columns + */ +export type SearchFilters = Record; + +// ============================================ +// Result Types (backend → frontend) +// ============================================ + +export interface SearchResponse = Record> { + /** Search results */ + results: SearchResult[]; + /** Total number of results */ + totalCount: number; + /** Query execution time in ms (from VS debug info) */ + queryTimeMs: number; + /** The query type that was actually used */ + queryType: 'ann' | 'hybrid' | 'full_text'; + /** Whether results were served from cache */ + fromCache: boolean; + /** Token for fetching next page. Null if no more results. */ + nextPageToken: string | null; +} + +export interface SearchResult = Record> { + /** Similarity score (0-1, higher = more similar) */ + score: number; + /** The result data — keys match the columns requested */ + data: T; +} + +// ============================================ +// Error Types +// ============================================ + +export interface SearchError { + code: 'UNAUTHORIZED' | 'INDEX_NOT_FOUND' | 'INVALID_QUERY' | 'RATE_LIMITED' | 'INTERNAL'; + message: string; + /** HTTP status from VS API */ + statusCode: number; +} + +// ============================================ +// Hook Types +// ============================================ + +export interface UseVectorSearchOptions { + /** Debounce delay in ms. Default: 300 */ + debounceMs?: number; + /** Override default numResults from server config */ + numResults?: number; + /** Override default queryType from server config */ + queryType?: 'ann' | 'hybrid' | 'full_text'; + /** Override reranker from server config */ + reranker?: boolean; + /** Initial filters */ + initialFilters?: SearchFilters; + /** Callback when search completes */ + onResults?: (response: SearchResponse) => void; + /** Callback on error */ + onError?: (error: SearchError) => void; + /** Minimum query length before searching. Default: 1 */ + minQueryLength?: number; +} + +export interface UseVectorSearchReturn = Record> { + /** Execute a search */ + search: (query: string) => void; + /** Current results */ + results: SearchResult[]; + /** Whether a search is in flight */ + isLoading: boolean; + /** Error from the last search, if any */ + error: SearchError | null; + /** Total result count */ + totalCount: number; + /** Query time in ms */ + queryTimeMs: number; + /** Whether results came from cache */ + fromCache: boolean; + /** Current query text */ + query: string; + /** Set filters programmatically */ + setFilters: (filters: SearchFilters) => void; + /** Current active filters */ + activeFilters: SearchFilters; + /** Clear all filters and results */ + clear: () => void; + /** Whether more results are available (pagination) */ + hasMore?: boolean; + /** Fetch next page and append to results (pagination) */ + loadMore?: () => void; + /** Whether a loadMore is in flight (pagination) */ + isLoadingMore?: boolean; +} + +// ============================================ +// Internal Types (not exported from package) +// ============================================ + +/** Raw response from VS REST API */ +export interface VsRawResponse { + manifest: { + column_count: number; + columns: Array<{ name: string; type?: string }>; + }; + result: { + row_count: number; + data_array: unknown[][]; + }; + next_page_token?: string | null; + debug_info?: { + response_time?: number; + ann_time?: number; + embedding_gen_time?: number; + latency_ms?: number; + [key: string]: unknown; + }; +} + +/** Token provider interface for auth */ +export interface TokenProvider { + getToken(): Promise; +} diff --git a/packages/vector-search/src/ui/components/SearchBox.tsx b/packages/vector-search/src/ui/components/SearchBox.tsx new file mode 100644 index 00000000..dc9c83bd --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchBox.tsx @@ -0,0 +1,73 @@ +import * as React from 'react'; + +interface SearchBoxProps { + onSearch: (query: string) => void; + value?: string; + placeholder?: string; + isLoading?: boolean; + autoFocus?: boolean; + className?: string; +} + +export function SearchBox({ + onSearch, + value, + placeholder = 'Search...', + isLoading = false, + autoFocus = false, + className, +}: SearchBoxProps) { + const [internalValue, setInternalValue] = React.useState(''); + const displayValue = value ?? internalValue; + const inputRef = React.useRef(null); + + const handleChange = (e: React.ChangeEvent) => { + const val = e.target.value; + if (value === undefined) setInternalValue(val); + onSearch(val); + }; + + const handleClear = () => { + if (value === undefined) setInternalValue(''); + onSearch(''); + inputRef.current?.focus(); + }; + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === 'Escape') handleClear(); + }; + + return ( +
+ + + + + {isLoading && ( +
+ )} + {displayValue && !isLoading && ( + + )} +
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchLoadMore.tsx b/packages/vector-search/src/ui/components/SearchLoadMore.tsx new file mode 100644 index 00000000..a351c247 --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchLoadMore.tsx @@ -0,0 +1,24 @@ +import * as React from 'react'; + +interface SearchLoadMoreProps { + hasMore: boolean; + isLoading: boolean; + onLoadMore: () => void; + className?: string; +} + +export function SearchLoadMore({ hasMore, isLoading, onLoadMore, className }: SearchLoadMoreProps) { + if (!hasMore) return null; + + return ( +
+ +
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchResultCard.tsx b/packages/vector-search/src/ui/components/SearchResultCard.tsx new file mode 100644 index 00000000..630e6b31 --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchResultCard.tsx @@ -0,0 +1,76 @@ +import * as React from 'react'; +import type { SearchResult } from '../../plugin/types'; + +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +interface SearchResultCardProps> { + result: SearchResult; + titleColumn?: keyof T; + descriptionColumn?: keyof T; + displayColumns?: (keyof T)[]; + showScore?: boolean; + query?: string; +} + +export function SearchResultCard>({ + result, + titleColumn, + descriptionColumn, + displayColumns, + showScore = false, + query, +}: SearchResultCardProps) { + const title = titleColumn ? String(result.data[titleColumn] ?? '') : undefined; + const description = descriptionColumn ? String(result.data[descriptionColumn] ?? '') : undefined; + + const highlight = (text: string): React.ReactNode => { + if (!query) return text; + const words = query.split(/\s+/).filter(w => w.length > 0); + if (words.length === 0) return text; + const regex = new RegExp(`(${words.map(escapeRegex).join('|')})`, 'gi'); + const parts = text.split(regex); + return parts.map((part, i) => + regex.test(part) + ? {part} + : part + ); + }; + + return ( +
+
+
+ {title && ( +

+ {highlight(title)} +

+ )} + {description && ( +

+ {highlight(description)} +

+ )} + {displayColumns && ( +
+ {displayColumns + .filter(col => col !== titleColumn && col !== descriptionColumn) + .map(col => ( + + {String(col)}:{' '} + {String(result.data[col] ?? '—')} + + ))} +
+ )} +
+ {showScore && ( + + {(result.score * 100).toFixed(0)}% + + )} +
+
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchResults.tsx b/packages/vector-search/src/ui/components/SearchResults.tsx new file mode 100644 index 00000000..64d5fc9d --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchResults.tsx @@ -0,0 +1,93 @@ +import * as React from 'react'; +import type { SearchResult, SearchError } from '../../plugin/types'; +import { SearchResultCard } from './SearchResultCard'; + +interface SearchResultsProps> { + results: SearchResult[]; + isLoading: boolean; + error: SearchError | null; + query: string; + totalCount: number; + queryTimeMs: number; + renderResult?: (result: SearchResult, index: number) => React.ReactNode; + displayColumns?: (keyof T)[]; + titleColumn?: keyof T; + descriptionColumn?: keyof T; + showScores?: boolean; + emptyMessage?: string; + className?: string; +} + +export function SearchResults>({ + results, + isLoading, + error, + query, + totalCount, + queryTimeMs, + renderResult, + displayColumns, + titleColumn, + descriptionColumn, + showScores = false, + emptyMessage = 'No results found.', + className, +}: SearchResultsProps) { + if (error) { + return ( +
+

Search failed

+

{error.message}

+
+ ); + } + + if (isLoading && results.length === 0) { + return ( +
+ {Array.from({ length: 3 }).map((_, i) => ( +
+
+
+
+
+ ))} +
+ ); + } + + if (!query) return null; + + if (results.length === 0) { + return ( +
+ {emptyMessage} +
+ ); + } + + return ( +
+
+ {totalCount} result{totalCount !== 1 ? 's' : ''} in {queryTimeMs}ms +
+
+ {results.map((result, index) => + renderResult + ? renderResult(result, index) + : ( + + ) + )} +
+
+ ); +} diff --git a/packages/vector-search/src/ui/hooks/useVectorSearch.ts b/packages/vector-search/src/ui/hooks/useVectorSearch.ts new file mode 100644 index 00000000..ad43d194 --- /dev/null +++ b/packages/vector-search/src/ui/hooks/useVectorSearch.ts @@ -0,0 +1,175 @@ +import { useState, useCallback, useRef, useEffect } from 'react'; +import type { + SearchResult, + SearchResponse, + SearchError, + SearchFilters, + UseVectorSearchOptions, + UseVectorSearchReturn, +} from '../../plugin/types'; + +export function useVectorSearch = Record>( + alias: string, + options: UseVectorSearchOptions = {}, +): UseVectorSearchReturn { + const { + debounceMs = 300, + numResults, + queryType, + reranker, + initialFilters = {}, + onResults, + onError, + minQueryLength = 1, + } = options; + + const [results, setResults] = useState[]>([]); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + const [query, setQuery] = useState(''); + const [totalCount, setTotalCount] = useState(0); + const [queryTimeMs, setQueryTimeMs] = useState(0); + const [fromCache, setFromCache] = useState(false); + const [activeFilters, setActiveFilters] = useState(initialFilters); + const [hasMore, setHasMore] = useState(false); + const [isLoadingMore, setIsLoadingMore] = useState(false); + + const nextPageTokenRef = useRef(null); + const abortRef = useRef(null); + const debounceRef = useRef | null>(null); + + const executeSearch = useCallback(async ( + searchQuery: string, + filters: SearchFilters, + isLoadMore = false, + ) => { + if (abortRef.current) abortRef.current.abort(); + abortRef.current = new AbortController(); + + if (!isLoadMore) { + setIsLoading(true); + setError(null); + } else { + setIsLoadingMore(true); + } + + try { + const url = isLoadMore + ? `/api/vector-search/${alias}/next-page` + : `/api/vector-search/${alias}/query`; + + const body: Record = isLoadMore + ? { pageToken: nextPageTokenRef.current } + : { + queryText: searchQuery, + ...(Object.keys(filters).length > 0 ? { filters } : {}), + ...(numResults !== undefined ? { numResults } : {}), + ...(queryType !== undefined ? { queryType } : {}), + ...(reranker !== undefined ? { reranker } : {}), + }; + + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal: abortRef.current.signal, + }); + + if (!response.ok) { + const err = await response.json(); + throw err as SearchError; + } + + const data: SearchResponse = await response.json(); + + if (isLoadMore) { + setResults(prev => [...prev, ...data.results]); + } else { + setResults(data.results); + } + + setTotalCount(data.totalCount); + setQueryTimeMs(data.queryTimeMs); + setFromCache(data.fromCache); + setHasMore(!!data.nextPageToken); + nextPageTokenRef.current = data.nextPageToken; + + onResults?.(data as SearchResponse); + } catch (err: unknown) { + if (err instanceof DOMException && err.name === 'AbortError') return; + const searchError = err as SearchError; + setError(searchError); + onError?.(searchError); + } finally { + setIsLoading(false); + setIsLoadingMore(false); + } + }, [alias, numResults, queryType, reranker, onResults, onError]); + + const search = useCallback((searchQuery: string) => { + setQuery(searchQuery); + + if (debounceRef.current) clearTimeout(debounceRef.current); + + if (searchQuery.length < minQueryLength) { + setResults([]); + setTotalCount(0); + setHasMore(false); + return; + } + + debounceRef.current = setTimeout(() => { + executeSearch(searchQuery, activeFilters); + }, debounceMs); + }, [debounceMs, minQueryLength, activeFilters, executeSearch]); + + const setFilters = useCallback((filters: SearchFilters) => { + setActiveFilters(filters); + if (query.length >= minQueryLength) { + executeSearch(query, filters); + } + }, [query, minQueryLength, executeSearch]); + + const loadMore = useCallback(() => { + if (hasMore && !isLoadingMore && nextPageTokenRef.current) { + executeSearch(query, activeFilters, true); + } + }, [hasMore, isLoadingMore, query, activeFilters, executeSearch]); + + const clear = useCallback(() => { + if (debounceRef.current) clearTimeout(debounceRef.current); + if (abortRef.current) abortRef.current.abort(); + setQuery(''); + setResults([]); + setError(null); + setTotalCount(0); + setQueryTimeMs(0); + setFromCache(false); + setHasMore(false); + nextPageTokenRef.current = null; + }, []); + + useEffect(() => { + return () => { + if (debounceRef.current) clearTimeout(debounceRef.current); + if (abortRef.current) abortRef.current.abort(); + }; + }, []); + + return { + search, + results, + isLoading, + error, + totalCount, + queryTimeMs, + fromCache, + query, + setFilters, + activeFilters, + clear, + hasMore, + loadMore, + isLoadingMore, + }; +} diff --git a/packages/vector-search/src/ui/index.ts b/packages/vector-search/src/ui/index.ts new file mode 100644 index 00000000..47797d97 --- /dev/null +++ b/packages/vector-search/src/ui/index.ts @@ -0,0 +1,6 @@ +export { useVectorSearch } from './hooks/useVectorSearch'; +export { SearchBox } from './components/SearchBox'; +export { SearchResults } from './components/SearchResults'; +export { SearchResultCard } from './components/SearchResultCard'; +export { SearchLoadMore } from './components/SearchLoadMore'; +export type { UseVectorSearchOptions, UseVectorSearchReturn } from '../plugin/types'; diff --git a/packages/vector-search/tests/integration/dogfood.test.ts b/packages/vector-search/tests/integration/dogfood.test.ts new file mode 100644 index 00000000..f0aa017e --- /dev/null +++ b/packages/vector-search/tests/integration/dogfood.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect, beforeAll } from 'vitest'; +import { VectorSearchClient } from '../../src/plugin/VectorSearchClient'; + +const DOGFOOD_HOST = 'e2-dogfood.staging.cloud.databricks.com'; +const TEST_INDEX = 'gurary_catalog.vector-search-brickfood.retrieval_perf_cuj_index_1'; + +// Skip unless DOGFOOD_TOKEN is set +describe.skipIf(!process.env.DOGFOOD_TOKEN)('Integration: VectorSearchClient → dogfood', () => { + let client: VectorSearchClient; + + beforeAll(() => { + client = new VectorSearchClient({ + host: DOGFOOD_HOST, + tokenProvider: { + getToken: async () => process.env.DOGFOOD_TOKEN!, + }, + }); + }); + + it('returns results for a valid hybrid query', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'aircraft instruments', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'hybrid', + }); + expect(response.results.length).toBeGreaterThan(0); + expect(response.results[0].score).toBeGreaterThan(0); + expect(response.results[0].data).toHaveProperty('text'); + expect(response.results[0].data).toHaveProperty('chunk_id'); + expect(response.queryTimeMs).toBeGreaterThan(0); + }, 30000); + + it('returns results for ANN query', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'navigation systems', + columns: ['chunk_id', 'text'], + numResults: 3, + queryType: 'ann', + }); + expect(response.results.length).toBeGreaterThan(0); + expect(response.results[0].score).toBeGreaterThan(0); + }, 30000); + + it('respects numResults limit', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'flight', + columns: ['chunk_id', 'text'], + numResults: 2, + queryType: 'hybrid', + }); + expect(response.results.length).toBeLessThanOrEqual(2); + }, 30000); + + it('returns scores between 0 and 1', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'altitude', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'hybrid', + }); + response.results.forEach(r => { + expect(r.score).toBeGreaterThanOrEqual(0); + expect(r.score).toBeLessThanOrEqual(1); + }); + }, 30000); + + it('handles empty results gracefully', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'xyzzy_absolutely_no_match_12345_qwerty', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'ann', + }); + // May still return results due to embedding similarity, but should have low scores + // If no results, that's fine too + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + }, 30000); + + it('response includes queryTimeMs from debug_info', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'weather radar', + columns: ['chunk_id', 'text'], + numResults: 3, + queryType: 'hybrid', + }); + expect(response.queryTimeMs).toBeGreaterThan(0); + expect(response.fromCache).toBe(false); + }, 30000); +}); diff --git a/packages/vector-search/tests/plugin/VectorSearchClient.test.ts b/packages/vector-search/tests/plugin/VectorSearchClient.test.ts new file mode 100644 index 00000000..8335a4f5 --- /dev/null +++ b/packages/vector-search/tests/plugin/VectorSearchClient.test.ts @@ -0,0 +1,233 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { VectorSearchClient } from '../../src/plugin/VectorSearchClient'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +const mockTokenProvider = { getToken: vi.fn().mockResolvedValue('sp-token-123') }; + +describe('VectorSearchClient', () => { + let client: VectorSearchClient; + + beforeEach(() => { + client = new VectorSearchClient({ + host: 'test-workspace.databricks.com', + tokenProvider: mockTokenProvider, + }); + mockFetch.mockReset(); + mockTokenProvider.getToken.mockClear(); + }); + + const validResponse = { + manifest: { column_count: 3, columns: [{ name: 'id' }, { name: 'title' }, { name: 'score' }] }, + result: { row_count: 2, data_array: [[1, 'ML Guide', 0.95], [2, 'AI Primer', 0.87]] }, + next_page_token: null, + debug_info: { response_time: 35 }, + }; + + describe('query()', () => { + it('constructs correct REST API URL and request body for hybrid search', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'machine learning', + columns: ['id', 'title'], numResults: 10, queryType: 'hybrid', + }); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-workspace.databricks.com/api/2.0/vector-search/indexes/cat.sch.idx/query'); + const body = JSON.parse(opts.body); + expect(body.query_text).toBe('machine learning'); + expect(body.query_type).toBe('HYBRID'); + expect(body.num_results).toBe(10); + expect(body.columns).toEqual(['id', 'title']); + expect(body.debug_level).toBe(1); + }); + + it('includes filters when provided', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', filters: { category: ['books'] }, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.filters).toEqual({ category: ['books'] }); + }); + + it('omits filters when empty object', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', filters: {}, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.filters).toBeUndefined(); + }); + + it('includes reranker config when boolean true', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'hybrid', reranker: true, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.reranker.model).toBe('databricks_reranker'); + // Default: all non-id columns + expect(body.reranker.parameters.columns_to_rerank).toEqual(['title']); + }); + + it('includes custom reranker columnsToRerank', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title', 'desc'], + numResults: 5, queryType: 'hybrid', reranker: { columnsToRerank: ['desc'] }, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.reranker.parameters.columns_to_rerank).toEqual(['desc']); + }); + + it('parses VS data_array response into typed SearchResult[]', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 10, queryType: 'hybrid', + }); + expect(result.results).toHaveLength(2); + expect(result.results[0].score).toBe(0.95); + expect(result.results[0].data).toEqual({ id: 1, title: 'ML Guide' }); + expect(result.results[1].score).toBe(0.87); + expect(result.results[1].data).toEqual({ id: 2, title: 'AI Primer' }); + expect(result.totalCount).toBe(2); + expect(result.queryTimeMs).toBe(35); + expect(result.fromCache).toBe(false); + expect(result.nextPageToken).toBeNull(); + }); + + it('handles next_page_token in response', async () => { + const responseWithToken = { ...validResponse, next_page_token: 'abc123' }; + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(responseWithToken) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 10, queryType: 'hybrid', + }); + expect(result.nextPageToken).toBe('abc123'); + }); + + it('uses SP token when no userToken provided', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', + }); + expect(mockTokenProvider.getToken).toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][1].headers['Authorization']).toBe('Bearer sp-token-123'); + }); + + it('uses userToken when provided (OBO)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', userToken: 'user-token-456', + }); + expect(mockTokenProvider.getToken).not.toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][1].headers['Authorization']).toBe('Bearer user-token-456'); + }); + + it('calls embeddingFn and sends query_vector for self-managed indexes', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + const mockEmbeddingFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', embeddingFn: mockEmbeddingFn, + }); + expect(mockEmbeddingFn).toHaveBeenCalledWith('test'); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.query_vector).toEqual([0.1, 0.2, 0.3]); + expect(body.query_text).toBeUndefined(); + }); + + it('sends query_text when no embeddingFn (managed embeddings)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.query_text).toBe('test'); + expect(body.query_vector).toBeUndefined(); + }); + + it('throws INVALID_QUERY when neither queryText nor queryVector provided', async () => { + await expect(client.query({ + indexName: 'x', columns: ['id'], numResults: 1, queryType: 'ann', + } as any)).rejects.toMatchObject({ code: 'INVALID_QUERY' }); + }); + + it('maps 401 → UNAUTHORIZED', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 401 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'UNAUTHORIZED', statusCode: 401 }); + }); + + it('maps 404 → INDEX_NOT_FOUND', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 404 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INDEX_NOT_FOUND', statusCode: 404 }); + }); + + it('maps 429 → RATE_LIMITED and retries', async () => { + mockFetch + .mockResolvedValueOnce({ ok: false, status: 429 }) + .mockResolvedValueOnce({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', + }); + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(result.results).toHaveLength(2); + }); + + it('does not retry 400 errors', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 400 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INVALID_QUERY' }); + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + + it('retries 500 errors up to 3 times', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 500 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INTERNAL', statusCode: 500 }); + expect(mockFetch).toHaveBeenCalledTimes(4); // 1 initial + 3 retries + }); + + it('retries network errors', async () => { + mockFetch + .mockRejectedValueOnce(new Error('ECONNRESET')) + .mockResolvedValueOnce({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', + }); + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(result.results).toHaveLength(2); + }); + }); + + describe('queryNextPage()', () => { + it('calls the query-next-page endpoint with page token', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.queryNextPage({ + indexName: 'cat.sch.idx', endpointName: 'my-endpoint', + pageToken: 'token123', + }); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-workspace.databricks.com/api/2.0/vector-search/indexes/cat.sch.idx/query-next-page'); + const body = JSON.parse(opts.body); + expect(body.endpoint_name).toBe('my-endpoint'); + expect(body.page_token).toBe('token123'); + }); + }); +}); diff --git a/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts b/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts new file mode 100644 index 00000000..479723fa --- /dev/null +++ b/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { VectorSearchPlugin } from '../../src/plugin/VectorSearchPlugin'; + +describe('VectorSearchPlugin', () => { + beforeEach(() => { + vi.stubEnv('DATABRICKS_HOST', 'test-host.databricks.com'); + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-secret'); + }); + + describe('setup()', () => { + it('throws if DATABRICKS_HOST is not set', async () => { + vi.stubEnv('DATABRICKS_HOST', ''); + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('DATABRICKS_HOST'); + }); + + it('throws if any index is missing indexName', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: '', columns: ['id'] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('indexName'); + }); + + it('throws if any index is missing columns', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: [] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('columns'); + }); + + it('throws if pagination enabled but no endpointName', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'], pagination: true }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('endpointName'); + }); + + it('succeeds with valid config', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'cat.sch.products_idx', + columns: ['id', 'name', 'description'], + queryType: 'hybrid', + numResults: 20, + }, + docs: { + indexName: 'cat.sch.docs_idx', + columns: ['id', 'title', 'content'], + reranker: true, + auth: 'on-behalf-of-user', + }, + }, + }); + await expect(plugin.setup()).resolves.not.toThrow(); + }); + }); + + describe('exports()', () => { + it('returns object with query function', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'] }, + }, + }); + await plugin.setup(); + const exports = plugin.exports(); + expect(exports).toHaveProperty('query'); + expect(typeof exports.query).toBe('function'); + }); + }); + + describe('getResourceRequirements()', () => { + it('returns resource entry for each configured index', () => { + const plugin = new VectorSearchPlugin({ + indexes: { + products: { indexName: 'cat.sch.products', columns: ['id'] }, + docs: { indexName: 'cat.sch.docs', columns: ['id'] }, + }, + }); + const resources = plugin.getResourceRequirements(); + expect(resources).toHaveLength(2); + expect(resources[0]).toEqual({ + type: 'vector-search-index', + name: 'cat.sch.products', + permission: 'SELECT', + }); + expect(resources[1]).toEqual({ + type: 'vector-search-index', + name: 'cat.sch.docs', + permission: 'SELECT', + }); + }); + }); + + describe('manifest', () => { + it('has correct name and env declarations', () => { + expect(VectorSearchPlugin.manifest.name).toBe('vector-search'); + expect(VectorSearchPlugin.manifest.env).toContainEqual( + expect.objectContaining({ name: 'DATABRICKS_HOST' }) + ); + }); + }); +}); diff --git a/packages/vector-search/tests/plugin/auth.test.ts b/packages/vector-search/tests/plugin/auth.test.ts new file mode 100644 index 00000000..925b4ef4 --- /dev/null +++ b/packages/vector-search/tests/plugin/auth.test.ts @@ -0,0 +1,108 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { ServicePrincipalTokenProvider, OboTokenExtractor } from '../../src/plugin/auth'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +describe('ServicePrincipalTokenProvider', () => { + let provider: ServicePrincipalTokenProvider; + + beforeEach(() => { + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client-id'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-client-secret'); + provider = new ServicePrincipalTokenProvider('test-host.databricks.com'); + mockFetch.mockReset(); + vi.useRealTimers(); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + }); + + it('fetches token from OIDC endpoint', async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-abc', expires_in: 3600 }), + }); + + const token = await provider.getToken(); + + expect(token).toBe('token-abc'); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-host.databricks.com/oidc/v1/token'); + expect(opts.method).toBe('POST'); + expect(opts.headers['Content-Type']).toBe('application/x-www-form-urlencoded'); + const body = new URLSearchParams(opts.body); + expect(body.get('grant_type')).toBe('client_credentials'); + expect(body.get('client_id')).toBe('test-client-id'); + expect(body.get('client_secret')).toBe('test-client-secret'); + expect(body.get('scope')).toBe('all-apis'); + }); + + it('returns cached token on subsequent calls within expiry', async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-abc', expires_in: 3600 }), + }); + + await provider.getToken(); + await provider.getToken(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + + it('refreshes token when within 2-minute expiry buffer', async () => { + vi.useFakeTimers(); + + mockFetch + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-1', expires_in: 3600 }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-2', expires_in: 3600 }), + }); + + const token1 = await provider.getToken(); + expect(token1).toBe('token-1'); + + // Advance to within 2 minutes of expiry (3600s - 120s = 3480s) + vi.advanceTimersByTime(3481 * 1000); + + const token2 = await provider.getToken(); + expect(token2).toBe('token-2'); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); +}); + +describe('OboTokenExtractor', () => { + it('extracts token from x-forwarded-access-token header', () => { + const req = { + headers: { 'x-forwarded-access-token': 'user-token-xyz' }, + } as any; + + const token = OboTokenExtractor.extractFromRequest(req); + expect(token).toBe('user-token-xyz'); + }); + + it('throws UNAUTHORIZED when header is missing', () => { + const req = { headers: {} } as any; + + expect(() => OboTokenExtractor.extractFromRequest(req)).toThrow(); + try { + OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + expect(err.code).toBe('UNAUTHORIZED'); + expect(err.statusCode).toBe(401); + } + }); + + it('throws UNAUTHORIZED when header is empty string', () => { + const req = { + headers: { 'x-forwarded-access-token': '' }, + } as any; + + expect(() => OboTokenExtractor.extractFromRequest(req)).toThrow(); + }); +}); diff --git a/packages/vector-search/tests/plugin/routes.test.ts b/packages/vector-search/tests/plugin/routes.test.ts new file mode 100644 index 00000000..ef043061 --- /dev/null +++ b/packages/vector-search/tests/plugin/routes.test.ts @@ -0,0 +1,206 @@ +import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest'; +import express from 'express'; +import request from 'supertest'; +import { createVectorSearchRouter } from '../../src/plugin/routes'; +import { VectorSearchPlugin } from '../../src/plugin/VectorSearchPlugin'; + +// Mock fetch for the VectorSearchClient +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +describe('Vector Search Routes', () => { + let app: express.Express; + let plugin: VectorSearchPlugin; + + const validVsResponse = { + manifest: { column_count: 3, columns: [{ name: 'id' }, { name: 'title' }, { name: 'score' }] }, + result: { row_count: 2, data_array: [[1, 'ML Guide', 0.95], [2, 'AI Primer', 0.87]] }, + next_page_token: null, + debug_info: { latency_ms: 35 }, + }; + + beforeAll(async () => { + vi.stubEnv('DATABRICKS_HOST', 'test-host.databricks.com'); + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-secret'); + + plugin = new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'cat.sch.products', + columns: ['id', 'title', 'description', 'category'], + queryType: 'hybrid', + numResults: 20, + }, + cached: { + indexName: 'cat.sch.cached', + columns: ['id', 'text'], + cache: { enabled: true, ttlSeconds: 60 }, + }, + paginated: { + indexName: 'cat.sch.paginated', + columns: ['id', 'text'], + pagination: true, + endpointName: 'my-endpoint', + }, + obo: { + indexName: 'cat.sch.obo', + columns: ['id', 'text'], + auth: 'on-behalf-of-user', + }, + }, + }); + await plugin.setup(); + + app = express(); + app.use(express.json()); + app.use('/api/vector-search', createVectorSearchRouter(plugin)); + }); + + beforeEach(() => { + mockFetch.mockReset(); + // Mock the OIDC token fetch that happens on first query + mockFetch.mockImplementation((url: string) => { + if (typeof url === 'string' && url.includes('/oidc/v1/token')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve({ access_token: 'sp-token', expires_in: 3600 }), + }); + } + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(validVsResponse), + }); + }); + }); + + describe('POST /:alias/query', () => { + it('returns results for valid query', async () => { + const res = await request(app) + .post('/api/vector-search/products/query') + .send({ queryText: 'machine learning' }) + .expect(200); + + expect(res.body.results).toHaveLength(2); + expect(res.body.results[0].score).toBe(0.95); + expect(res.body.results[0].data.title).toBe('ML Guide'); + expect(res.body.totalCount).toBe(2); + expect(res.body.queryTimeMs).toBe(35); + }); + + it('returns 404 for unknown alias', async () => { + const res = await request(app) + .post('/api/vector-search/unknown/query') + .send({ queryText: 'test' }) + .expect(404); + + expect(res.body.code).toBe('INDEX_NOT_FOUND'); + }); + + it('returns 400 for missing queryText and queryVector', async () => { + const res = await request(app) + .post('/api/vector-search/products/query') + .send({}) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + }); + + it('passes filters to VS client', async () => { + await request(app) + .post('/api/vector-search/products/query') + .send({ queryText: 'test', filters: { category: 'books' } }) + .expect(200); + + // Verify the VS API call included filters + const vsCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query') + ); + expect(vsCall).toBeDefined(); + const body = JSON.parse(vsCall![1].body); + expect(body.filters).toEqual({ category: 'books' }); + }); + + it('uses OBO token when auth is on-behalf-of-user', async () => { + await request(app) + .post('/api/vector-search/obo/query') + .set('x-forwarded-access-token', 'user-token-123') + .send({ queryText: 'test' }) + .expect(200); + + const vsCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query') + ); + expect(vsCall![1].headers['Authorization']).toBe('Bearer user-token-123'); + }); + + it('returns 401 when OBO index has no user token', async () => { + const res = await request(app) + .post('/api/vector-search/obo/query') + .send({ queryText: 'test' }) + .expect(401); + + expect(res.body.code).toBe('UNAUTHORIZED'); + }); + }); + + describe('POST /:alias/next-page', () => { + it('returns 400 when pagination not enabled', async () => { + const res = await request(app) + .post('/api/vector-search/products/next-page') + .send({ pageToken: 'abc' }) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + expect(res.body.message).toContain('Pagination'); + }); + + it('returns 400 when pageToken missing', async () => { + const res = await request(app) + .post('/api/vector-search/paginated/next-page') + .send({}) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + expect(res.body.message).toContain('pageToken'); + }); + + it('calls query-next-page endpoint when valid', async () => { + await request(app) + .post('/api/vector-search/paginated/next-page') + .send({ pageToken: 'token123' }) + .expect(200); + + const nextPageCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query-next-page') + ); + expect(nextPageCall).toBeDefined(); + const body = JSON.parse(nextPageCall![1].body); + expect(body.page_token).toBe('token123'); + expect(body.endpoint_name).toBe('my-endpoint'); + }); + }); + + describe('GET /:alias/config', () => { + it('returns public config for valid alias', async () => { + const res = await request(app) + .get('/api/vector-search/products/config') + .expect(200); + + expect(res.body.alias).toBe('products'); + expect(res.body.columns).toEqual(['id', 'title', 'description', 'category']); + expect(res.body.queryType).toBe('hybrid'); + expect(res.body.numResults).toBe(20); + expect(res.body.reranker).toBe(false); + expect(res.body.pagination).toBe(false); + }); + + it('returns 404 for unknown alias', async () => { + const res = await request(app) + .get('/api/vector-search/unknown/config') + .expect(404); + + expect(res.body.code).toBe('INDEX_NOT_FOUND'); + }); + }); +}); diff --git a/packages/vector-search/tests/ui/components/components.test.tsx b/packages/vector-search/tests/ui/components/components.test.tsx new file mode 100644 index 00000000..681f8069 --- /dev/null +++ b/packages/vector-search/tests/ui/components/components.test.tsx @@ -0,0 +1,158 @@ +import { describe, it, expect, vi } from 'vitest'; +import { render, screen, fireEvent } from '@testing-library/react'; +import { SearchBox } from '../../../src/ui/components/SearchBox'; +import { SearchResultCard } from '../../../src/ui/components/SearchResultCard'; +import { SearchResults } from '../../../src/ui/components/SearchResults'; +import { SearchLoadMore } from '../../../src/ui/components/SearchLoadMore'; + +describe('SearchBox', () => { + it('renders input with placeholder', () => { + render( {}} placeholder="Search products..." />); + expect(screen.getByPlaceholderText('Search products...')).toBeInTheDocument(); + }); + + it('calls onSearch on input change', () => { + const onSearch = vi.fn(); + render(); + fireEvent.change(screen.getByRole('searchbox'), { target: { value: 'test' } }); + expect(onSearch).toHaveBeenCalledWith('test'); + }); + + it('shows clear button when value present', () => { + render( {}} value="test" />); + expect(screen.getByLabelText('Clear search')).toBeInTheDocument(); + }); + + it('hides clear button when value empty', () => { + render( {}} value="" />); + expect(screen.queryByLabelText('Clear search')).not.toBeInTheDocument(); + }); + + it('calls onSearch with empty string on clear', () => { + const onSearch = vi.fn(); + render(); + fireEvent.click(screen.getByLabelText('Clear search')); + expect(onSearch).toHaveBeenCalledWith(''); + }); + + it('clears on Escape key', () => { + const onSearch = vi.fn(); + render(); + fireEvent.keyDown(screen.getByRole('searchbox'), { key: 'Escape' }); + expect(onSearch).toHaveBeenCalledWith(''); + }); + + it('shows loading spinner when isLoading', () => { + render( {}} isLoading />); + expect(screen.getByTestId('loading-spinner')).toBeInTheDocument(); + }); +}); + +describe('SearchResultCard', () => { + const result = { + score: 0.95, + data: { id: 1, title: 'Machine Learning Guide', description: 'A guide to ML algorithms', category: 'books' }, + }; + + it('renders title and description', () => { + render(); + expect(screen.getByText('Machine Learning Guide')).toBeInTheDocument(); + expect(screen.getByText('A guide to ML algorithms')).toBeInTheDocument(); + }); + + it('highlights query words with mark tags', () => { + const { container } = render( + + ); + const marks = container.querySelectorAll('mark'); + expect(marks.length).toBeGreaterThan(0); + expect(marks[0].textContent).toBe('Machine'); + }); + + it('shows score badge when showScore is true', () => { + render(); + expect(screen.getByText('95%')).toBeInTheDocument(); + }); + + it('hides score badge by default', () => { + render(); + expect(screen.queryByText('95%')).not.toBeInTheDocument(); + }); + + it('renders display columns as metadata', () => { + render( + + ); + expect(screen.getByText('category:')).toBeInTheDocument(); + expect(screen.getByText('books')).toBeInTheDocument(); + }); +}); + +describe('SearchResults', () => { + const results = [ + { score: 0.95, data: { id: 1, title: 'Result 1' } }, + { score: 0.87, data: { id: 2, title: 'Result 2' } }, + ]; + + it('shows loading skeleton when loading with no results', () => { + render(); + expect(screen.getByTestId('loading-skeleton')).toBeInTheDocument(); + }); + + it('shows empty message when no results', () => { + render(); + expect(screen.getByText('No results found.')).toBeInTheDocument(); + }); + + it('shows custom empty message', () => { + render(); + expect(screen.getByText('Nothing here')).toBeInTheDocument(); + }); + + it('shows error banner', () => { + const error = { code: 'INTERNAL' as const, message: 'Server error', statusCode: 500 }; + render(); + expect(screen.getByText('Search failed')).toBeInTheDocument(); + expect(screen.getByText('Server error')).toBeInTheDocument(); + }); + + it('renders results with summary', () => { + render(); + expect(screen.getByText('2 results in 35ms')).toBeInTheDocument(); + expect(screen.getByText('Result 1')).toBeInTheDocument(); + expect(screen.getByText('Result 2')).toBeInTheDocument(); + }); + + it('returns null when no query', () => { + const { container } = render(); + expect(container.firstChild).toBeNull(); + }); +}); + +describe('SearchLoadMore', () => { + it('renders button when hasMore is true', () => { + render( {}} />); + expect(screen.getByText('Load more results')).toBeInTheDocument(); + }); + + it('renders nothing when hasMore is false', () => { + const { container } = render( {}} />); + expect(container.firstChild).toBeNull(); + }); + + it('shows Loading... when isLoading', () => { + render( {}} />); + expect(screen.getByText('Loading...')).toBeInTheDocument(); + }); + + it('calls onLoadMore on click', () => { + const onLoadMore = vi.fn(); + render(); + fireEvent.click(screen.getByText('Load more results')); + expect(onLoadMore).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts b/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts new file mode 100644 index 00000000..c663c75e --- /dev/null +++ b/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts @@ -0,0 +1,201 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { renderHook, act } from '@testing-library/react'; +import { useVectorSearch } from '../../../src/ui/hooks/useVectorSearch'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +const mockResponse = { + results: [{ score: 0.95, data: { id: 1, title: 'Test Result' } }], + totalCount: 1, + queryTimeMs: 20, + queryType: 'hybrid', + fromCache: false, + nextPageToken: null, +}; + +/** Flush all pending microtasks (promise callbacks) */ +const flushPromises = () => act(() => Promise.resolve()); + +describe('useVectorSearch', () => { + beforeEach(() => { + mockFetch.mockReset(); + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it('debounces search calls (300ms default)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('a'); }); + act(() => { result.current.search('ab'); }); + act(() => { result.current.search('abc'); }); + + // Before debounce fires + expect(mockFetch).not.toHaveBeenCalled(); + + // After debounce — advance timers then flush promises for fetch resolution + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.queryText).toBe('abc'); + }); + + it('does not search below minQueryLength', async () => { + const { result } = renderHook(() => + useVectorSearch('products', { minQueryLength: 3 }) + ); + + act(() => { result.current.search('ab'); }); + await act(async () => { vi.advanceTimersByTime(400); }); + + expect(mockFetch).not.toHaveBeenCalled(); + expect(result.current.results).toEqual([]); + }); + + it('sets isLoading true during search', async () => { + let resolveJson!: (v: unknown) => void; + mockFetch.mockReturnValue( + Promise.resolve({ + ok: true, + json: () => new Promise((r) => { resolveJson = r; }), + }) + ); + + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + // fetch was called, but json() hasn't resolved yet + await flushPromises(); + + expect(result.current.isLoading).toBe(true); + + await act(async () => { resolveJson(mockResponse); }); + expect(result.current.isLoading).toBe(false); + expect(result.current.results).toHaveLength(1); + }); + + it('populates results after successful search', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.results).toHaveLength(1); + expect(result.current.results[0].score).toBe(0.95); + expect(result.current.results[0].data).toEqual({ id: 1, title: 'Test Result' }); + expect(result.current.totalCount).toBe(1); + expect(result.current.queryTimeMs).toBe(20); + expect(result.current.fromCache).toBe(false); + expect(result.current.query).toBe('test'); + }); + + it('sets error on failed search', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 404, + json: () => Promise.resolve({ code: 'INDEX_NOT_FOUND', message: 'Not found', statusCode: 404 }), + }); + + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.error).not.toBeNull(); + expect(result.current.error!.code).toBe('INDEX_NOT_FOUND'); + expect(result.current.isLoading).toBe(false); + }); + + it('clears everything on clear()', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.results).toHaveLength(1); + + act(() => { result.current.clear(); }); + + expect(result.current.results).toEqual([]); + expect(result.current.query).toBe(''); + expect(result.current.totalCount).toBe(0); + expect(result.current.error).toBeNull(); + }); + + it('re-executes search when filters change', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + + await act(async () => { result.current.setFilters({ category: 'books' }); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(2); + const body = JSON.parse(mockFetch.mock.calls[1][1].body); + expect(body.filters).toEqual({ category: 'books' }); + }); + + it('calls onResults callback on success', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const onResults = vi.fn(); + const { result } = renderHook(() => + useVectorSearch('products', { onResults }) + ); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(onResults).toHaveBeenCalledTimes(1); + expect(onResults).toHaveBeenCalledWith(mockResponse); + }); + + it('calls onError callback on failure', async () => { + const errorResponse = { code: 'INTERNAL', message: 'Server error', statusCode: 500 }; + mockFetch.mockResolvedValue({ + ok: false, + status: 500, + json: () => Promise.resolve(errorResponse), + }); + const onError = vi.fn(); + const { result } = renderHook(() => + useVectorSearch('products', { onError }) + ); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(onError).toHaveBeenCalledTimes(1); + }); + + it('sends request to correct API endpoint', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][0]).toBe('/api/vector-search/products/query'); + }); +}); diff --git a/packages/vector-search/tsconfig.json b/packages/vector-search/tsconfig.json new file mode 100644 index 00000000..c2c6364a --- /dev/null +++ b/packages/vector-search/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "jsx": "react-jsx", + "declaration": true, + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts", "**/*.test.tsx"] +} diff --git a/packages/vector-search/vitest.config.ts b/packages/vector-search/vitest.config.ts new file mode 100644 index 00000000..6ccbfd4c --- /dev/null +++ b/packages/vector-search/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + environment: 'jsdom', + globals: true, + setupFiles: ['./vitest.setup.ts'], + }, +}); diff --git a/packages/vector-search/vitest.setup.ts b/packages/vector-search/vitest.setup.ts new file mode 100644 index 00000000..bb02c60c --- /dev/null +++ b/packages/vector-search/vitest.setup.ts @@ -0,0 +1 @@ +import '@testing-library/jest-dom/vitest';