From b172e4c992c010a3367d83ec3e9d7d6120242b83 Mon Sep 17 00:00:00 2001 From: Adam Gurary Date: Fri, 20 Mar 2026 13:06:00 -0400 Subject: [PATCH] Add Vector Search plugin for AppKit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds @databricks/appkit-vector-search — a plugin that gives Databricks Apps built with AppKit instant vector search query capabilities. Ships backend (Express routes, VS REST API client, auth) and frontend (React hook, styled components with Radix UI). Developer experience target: ~45 lines for a full search page with search box, results, filters, and keyword highlighting. 82 tests included. Validated against real VS index on dogfood. --- packages/vector-search/README.md | 377 ++++++++++++++++++ packages/vector-search/package.json | 47 +++ packages/vector-search/src/index.ts | 13 + .../src/plugin/VectorSearchClient.ts | 200 ++++++++++ .../src/plugin/VectorSearchPlugin.ts | 105 +++++ packages/vector-search/src/plugin/auth.ts | 48 +++ packages/vector-search/src/plugin/routes.ts | 136 +++++++ packages/vector-search/src/plugin/types.ts | 198 +++++++++ .../src/ui/components/SearchBox.tsx | 73 ++++ .../src/ui/components/SearchLoadMore.tsx | 24 ++ .../src/ui/components/SearchResultCard.tsx | 76 ++++ .../src/ui/components/SearchResults.tsx | 93 +++++ .../src/ui/hooks/useVectorSearch.ts | 175 ++++++++ packages/vector-search/src/ui/index.ts | 6 + .../tests/integration/dogfood.test.ts | 97 +++++ .../tests/plugin/VectorSearchClient.test.ts | 233 +++++++++++ .../tests/plugin/VectorSearchPlugin.test.ts | 115 ++++++ .../vector-search/tests/plugin/auth.test.ts | 108 +++++ .../vector-search/tests/plugin/routes.test.ts | 206 ++++++++++ .../tests/ui/components/components.test.tsx | 158 ++++++++ .../tests/ui/hooks/useVectorSearch.test.ts | 201 ++++++++++ packages/vector-search/tsconfig.json | 16 + packages/vector-search/vitest.config.ts | 9 + packages/vector-search/vitest.setup.ts | 1 + 24 files changed, 2715 insertions(+) create mode 100644 packages/vector-search/README.md create mode 100644 packages/vector-search/package.json create mode 100644 packages/vector-search/src/index.ts create mode 100644 packages/vector-search/src/plugin/VectorSearchClient.ts create mode 100644 packages/vector-search/src/plugin/VectorSearchPlugin.ts create mode 100644 packages/vector-search/src/plugin/auth.ts create mode 100644 packages/vector-search/src/plugin/routes.ts create mode 100644 packages/vector-search/src/plugin/types.ts create mode 100644 packages/vector-search/src/ui/components/SearchBox.tsx create mode 100644 packages/vector-search/src/ui/components/SearchLoadMore.tsx create mode 100644 packages/vector-search/src/ui/components/SearchResultCard.tsx create mode 100644 packages/vector-search/src/ui/components/SearchResults.tsx create mode 100644 packages/vector-search/src/ui/hooks/useVectorSearch.ts create mode 100644 packages/vector-search/src/ui/index.ts create mode 100644 packages/vector-search/tests/integration/dogfood.test.ts create mode 100644 packages/vector-search/tests/plugin/VectorSearchClient.test.ts create mode 100644 packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts create mode 100644 packages/vector-search/tests/plugin/auth.test.ts create mode 100644 packages/vector-search/tests/plugin/routes.test.ts create mode 100644 packages/vector-search/tests/ui/components/components.test.tsx create mode 100644 packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts create mode 100644 packages/vector-search/tsconfig.json create mode 100644 packages/vector-search/vitest.config.ts create mode 100644 packages/vector-search/vitest.setup.ts diff --git a/packages/vector-search/README.md b/packages/vector-search/README.md new file mode 100644 index 00000000..b831fc00 --- /dev/null +++ b/packages/vector-search/README.md @@ -0,0 +1,377 @@ +# @databricks/appkit-vector-search + +Appkit plugin that adds Databricks Vector Search to your app — backend routes, React hook, and UI components in one package. + +## Quick Start + +**Backend** (`app.ts`): + +```typescript +import { createApp } from '@databricks/appkit'; +import { VectorSearchPlugin } from '@databricks/appkit-vector-search'; + +createApp({ + plugins: [ + new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description', 'price', 'category'], + }, + }, + }), + ], +}); +``` + +**Frontend** (`ProductSearch.tsx`): + +```tsx +import { useVectorSearch, SearchBox, SearchResults } from '@databricks/appkit-vector-search'; + +function ProductSearch() { + const vs = useVectorSearch<{ id: string; name: string; description: string; price: number; category: string }>('products'); + + return ( +
+ + +
+ ); +} +``` + +That's it — hybrid search with debouncing, loading states, keyword highlighting, and error handling. + +## Installation + +```bash +npm install @databricks/appkit-vector-search +``` + +Peer dependencies: `react ^18.x`, `@databricks/appkit ^0.x`. + +## Backend Setup + +Register the plugin with `createApp`. Each key in `indexes` is an **alias** used by the frontend hook and API routes. + +```typescript +new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'catalog.schema.product_index', // required — three-level UC name + columns: ['id', 'name', 'description'], // required — columns to return + queryType: 'hybrid', // 'ann' | 'hybrid' | 'full_text' (default: 'hybrid') + numResults: 20, // max results per query (default: 20) + reranker: false, // enable Databricks reranker (default: false) + auth: 'service-principal', // 'service-principal' | 'on-behalf-of-user' (default: 'service-principal') + cache: { enabled: false }, // see Caching section + pagination: false, // see Pagination section + endpointName: 'my-endpoint', // required when pagination: true + embeddingFn: undefined, // see Self-Managed Embeddings section + }, + }, +}) +``` + +### IndexConfig Reference + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `indexName` | `string` | *required* | Three-level UC name (`catalog.schema.index`) | +| `columns` | `string[]` | *required* | Columns to return in results | +| `queryType` | `'ann' \| 'hybrid' \| 'full_text'` | `'hybrid'` | Default search mode | +| `numResults` | `number` | `20` | Max results per query | +| `reranker` | `boolean \| { columnsToRerank: string[] }` | `false` | Enable built-in reranker | +| `auth` | `'service-principal' \| 'on-behalf-of-user'` | `'service-principal'` | Auth mode | +| `cache` | `CacheConfig` | `undefined` | Optional result caching | +| `pagination` | `boolean` | `false` | Enable cursor pagination | +| `endpointName` | `string` | `undefined` | VS endpoint name (required if `pagination: true`) | +| `embeddingFn` | `(text: string) => Promise` | `undefined` | Custom embedding function for self-managed indexes | + +## Frontend + +### `useVectorSearch` Hook + +```typescript +const vs = useVectorSearch('products', { + debounceMs: 300, // debounce delay (default: 300) + numResults: 10, // override server default + queryType: 'ann', // override server default + reranker: true, // override server default + minQueryLength: 2, // minimum chars before searching (default: 1) + initialFilters: { category: 'electronics' }, + onResults: (response) => console.log(response), + onError: (error) => console.error(error), +}); +``` + +**Returns:** + +| Property | Type | Description | +|----------|------|-------------| +| `search` | `(query: string) => void` | Execute a search (debounced) | +| `results` | `SearchResult[]` | Current results (each has `.score` and `.data`) | +| `isLoading` | `boolean` | Whether a search is in flight | +| `error` | `SearchError \| null` | Error from last search | +| `query` | `string` | Current query text | +| `totalCount` | `number` | Total result count | +| `queryTimeMs` | `number` | Query execution time in ms | +| `fromCache` | `boolean` | Whether results came from cache | +| `setFilters` | `(filters) => void` | Set filters and re-execute search | +| `activeFilters` | `SearchFilters` | Current active filters | +| `clear` | `() => void` | Clear query, results, and filters | +| `hasMore` | `boolean` | More results available (pagination) | +| `loadMore` | `() => void` | Fetch next page, append to results | +| `isLoadingMore` | `boolean` | Whether loadMore is in flight | + +The hook handles debouncing, request cancellation (AbortController), filter reactivity, and cleanup on unmount. + +### Components + +#### `` + +```tsx + +``` + +Includes search icon, clear button (appears when input has value), Escape key to clear, and loading spinner. + +#### `` + +```tsx + ...} // fully custom result rendering (overrides default card) + className="mt-4" +/> +``` + +States: loading skeleton (3 animated cards), error banner, empty message, results with count + timing. + +#### `` + +Used internally by `SearchResults`, but can be used standalone: + +```tsx + +``` + +#### `` + +```tsx + +``` + +### Filters + +Use `setFilters` from the hook to apply VS filter syntax: + +```typescript +// IN list +vs.setFilters({ category: ['electronics', 'books'] }); + +// Comparison operators +vs.setFilters({ 'price >=': 10, 'price <=': 100 }); + +// NOT +vs.setFilters({ 'title NOT': 'test' }); + +// LIKE +vs.setFilters({ 'name LIKE': 'data%' }); + +// OR across columns +vs.setFilters({ 'color1 OR color2': ['red', 'blue'] }); +``` + +Calling `setFilters` immediately re-executes the current search with the new filters. + +## Auth + +### Service Principal (default) + +The plugin uses `DATABRICKS_CLIENT_ID` and `DATABRICKS_CLIENT_SECRET` from the environment. When deployed to Databricks Apps, these are set automatically. OAuth tokens are cached and refreshed with a 2-minute buffer before expiry. + +No configuration needed — this is the default. + +### On-Behalf-of-User + +For indexes with row-level security or Unity Catalog permissions: + +```typescript +indexes: { + docs: { + indexName: 'catalog.schema.docs_index', + columns: ['id', 'title', 'content'], + auth: 'on-behalf-of-user', // uses the logged-in user's token + }, +} +``` + +The plugin extracts the user's OAuth token from the `x-forwarded-access-token` header (set by Databricks Apps proxy). Queries run with the user's identity and UC permissions. + +## Self-Managed Embeddings + +For indexes that don't use Databricks-managed embeddings, provide an `embeddingFn` that converts query text to a vector: + +```typescript +indexes: { + custom: { + indexName: 'catalog.schema.custom_index', + columns: ['id', 'title', 'content'], + queryType: 'ann', + embeddingFn: async (text) => { + const resp = await fetch( + `https://${process.env.DATABRICKS_HOST}/serving-endpoints/my-embedding-model/invocations`, + { + method: 'POST', + headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' }, + body: JSON.stringify({ input: [text] }), + }, + ); + const data = await resp.json(); + return data.data[0].embedding; + }, + }, +} +``` + +When `embeddingFn` is set, the plugin calls it to convert `queryText` into `queryVector` before sending to VS. The frontend hook works identically — users type text, the backend handles the conversion. + +If omitted, the plugin sends `queryText` directly and VS computes embeddings server-side (managed mode). + +## Caching + +Optional LRU cache for search results. Off by default (freeform search has low cache hit rates). + +```typescript +indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description'], + cache: { + enabled: true, + ttlSeconds: 120, // time-to-live per entry (default: 60) + maxEntries: 1000, // max cached queries (default: 1000) + }, + }, +} +``` + +Cached responses include `fromCache: true` in the response. The hook exposes this via `vs.fromCache`. + +## Pagination + +Cursor-based pagination for large result sets. Off by default — VS typically returns results in 20-40ms, so most apps don't need it. + +```typescript +indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description'], + pagination: true, + endpointName: 'my-vs-endpoint', // required when pagination is enabled + }, +} +``` + +Frontend usage: + +```tsx +const vs = useVectorSearch('products'); + +return ( + <> + + + + +); +``` + +`loadMore` fetches the next page and appends results to the existing array. + +## API Reference + +The plugin registers these Express routes automatically: + +| Method | Path | Body | Description | +|--------|------|------|-------------| +| `POST` | `/api/vector-search/:alias/query` | `SearchRequest` | Execute a search | +| `POST` | `/api/vector-search/:alias/next-page` | `{ pageToken: string }` | Fetch next page (requires `pagination: true`) | +| `GET` | `/api/vector-search/:alias/config` | — | Returns index config (columns, queryType, numResults, etc.) | + +### SearchRequest Body + +```json +{ + "queryText": "wireless headphones", + "filters": { "category": ["electronics"] }, + "numResults": 10, + "queryType": "hybrid", + "reranker": true +} +``` + +### SearchResponse + +```json +{ + "results": [ + { "score": 0.92, "data": { "id": "1", "name": "...", "description": "..." } } + ], + "totalCount": 47, + "queryTimeMs": 35, + "queryType": "hybrid", + "fromCache": false, + "nextPageToken": null +} +``` + +### Error Response + +```json +{ + "code": "INVALID_QUERY", + "message": "queryText or queryVector is required", + "statusCode": 400 +} +``` + +Error codes: `UNAUTHORIZED`, `INDEX_NOT_FOUND`, `INVALID_QUERY`, `RATE_LIMITED`, `INTERNAL`. diff --git a/packages/vector-search/package.json b/packages/vector-search/package.json new file mode 100644 index 00000000..efdeb804 --- /dev/null +++ b/packages/vector-search/package.json @@ -0,0 +1,47 @@ +{ + "name": "@databricks/appkit-vector-search", + "version": "0.1.0", + "description": "Vector Search plugin for Databricks Appkit", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "types": "./dist/index.d.ts" + }, + "./ui": { + "import": "./dist/ui/index.js", + "types": "./dist/ui/index.d.ts" + } + }, + "scripts": { + "build": "tsc", + "dev": "tsc --watch", + "test": "vitest run", + "test:watch": "vitest" + }, + "peerDependencies": { + "@databricks/appkit": "^0.1.0", + "react": "^18.0.0" + }, + "dependencies": { + "@radix-ui/react-icons": "^1.3.0", + "@radix-ui/react-select": "^2.1.0", + "@radix-ui/react-slider": "^1.2.0", + "express": "^5.2.1" + }, + "devDependencies": { + "@testing-library/jest-dom": "^6.4.0", + "@testing-library/react": "^15.0.0", + "@types/express": "^4.17.0", + "@types/react": "^18.3.0", + "@types/react-dom": "^19.2.3", + "@types/supertest": "^7.2.0", + "jsdom": "^24.0.0", + "react": "^19.2.4", + "react-dom": "^19.2.4", + "supertest": "^7.2.2", + "typescript": "^5.4.0", + "vitest": "^1.6.0" + } +} diff --git a/packages/vector-search/src/index.ts b/packages/vector-search/src/index.ts new file mode 100644 index 00000000..dbe386e9 --- /dev/null +++ b/packages/vector-search/src/index.ts @@ -0,0 +1,13 @@ +export { VectorSearchPlugin } from './plugin/VectorSearchPlugin'; +export { createVectorSearchRouter } from './plugin/routes'; +export type { + VectorSearchPluginConfig, + IndexConfig, + RerankerConfig, + CacheConfig, + SearchRequest, + SearchResponse, + SearchResult, + SearchFilters, + SearchError, +} from './plugin/types'; diff --git a/packages/vector-search/src/plugin/VectorSearchClient.ts b/packages/vector-search/src/plugin/VectorSearchClient.ts new file mode 100644 index 00000000..d80ceb86 --- /dev/null +++ b/packages/vector-search/src/plugin/VectorSearchClient.ts @@ -0,0 +1,200 @@ +import type { SearchResponse, SearchFilters, SearchError, RerankerConfig, TokenProvider, VsRawResponse } from './types'; + +export class VectorSearchClient { + private host: string; + private tokenProvider: TokenProvider; + + constructor(config: { host: string; tokenProvider: TokenProvider }) { + this.host = config.host; + this.tokenProvider = config.tokenProvider; + } + + async query(params: { + indexName: string; + queryText?: string; + queryVector?: number[]; + columns: string[]; + numResults: number; + queryType: 'ann' | 'hybrid' | 'full_text'; + filters?: SearchFilters; + reranker?: boolean | RerankerConfig; + userToken?: string; + embeddingFn?: (text: string) => Promise; + }): Promise { + const token = params.userToken ?? await this.tokenProvider.getToken(); + + // Resolve query: managed (query_text) vs self-managed (query_vector) + let queryText = params.queryText; + let queryVector = params.queryVector; + + if (params.embeddingFn && queryText && !queryVector) { + queryVector = await params.embeddingFn(queryText); + queryText = undefined; + } + + if (!queryText && !queryVector) { + throw { + code: 'INVALID_QUERY' as const, + message: 'Either queryText or queryVector is required', + statusCode: 400, + }; + } + + const body: Record = { + columns: params.columns, + num_results: params.numResults, + query_type: params.queryType.toUpperCase(), + debug_level: 1, + }; + + if (queryText) body.query_text = queryText; + if (queryVector) body.query_vector = queryVector; + + if (params.filters && Object.keys(params.filters).length > 0) { + body.filters = params.filters; + } + + if (params.reranker) { + const columnsToRerank = typeof params.reranker === 'object' + ? params.reranker.columnsToRerank + : params.columns.filter(c => c !== 'id'); + body.reranker = { + model: 'databricks_reranker', + parameters: { columns_to_rerank: columnsToRerank }, + }; + } + + const response = await this.fetchWithRetry( + `https://${this.host}/api/2.0/vector-search/indexes/${params.indexName}/query`, + { + method: 'POST', + headers: { + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(body), + }, + ); + + if (!response.ok) throw this.mapError(response); + const raw = await response.json() as VsRawResponse; + return this.parseResponse(raw, params.queryType); + } + + async queryNextPage(params: { + indexName: string; + endpointName: string; + pageToken: string; + userToken?: string; + }): Promise { + const token = params.userToken ?? await this.tokenProvider.getToken(); + + const response = await this.fetchWithRetry( + `https://${this.host}/api/2.0/vector-search/indexes/${params.indexName}/query-next-page`, + { + method: 'POST', + headers: { + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + endpoint_name: params.endpointName, + page_token: params.pageToken, + }), + }, + ); + + if (!response.ok) throw this.mapError(response); + const raw = await response.json() as VsRawResponse; + return this.parseResponse(raw, 'hybrid'); + } + + private parseResponse(raw: VsRawResponse, queryType: 'ann' | 'hybrid' | 'full_text'): SearchResponse { + const columnNames = raw.manifest.columns.map(c => c.name); + const scoreIndex = columnNames.indexOf('score'); + + const results = raw.result.data_array.map(row => { + const data: Record = {}; + for (let i = 0; i < columnNames.length; i++) { + if (columnNames[i] !== 'score') data[columnNames[i]] = row[i]; + } + return { + score: scoreIndex >= 0 ? (row[scoreIndex] as number) : 0, + data, + }; + }); + + return { + results, + totalCount: raw.result.row_count, + queryTimeMs: raw.debug_info?.response_time ?? raw.debug_info?.latency_ms ?? 0, + queryType, + fromCache: false, + nextPageToken: raw.next_page_token ?? null, + }; + } + + private mapError(response: { status: number }): SearchError { + const codeMap: Record = { + 401: 'UNAUTHORIZED', + 403: 'UNAUTHORIZED', + 404: 'INDEX_NOT_FOUND', + 400: 'INVALID_QUERY', + 429: 'RATE_LIMITED', + }; + return { + code: codeMap[response.status] ?? 'INTERNAL', + message: `VS query failed with status ${response.status}`, + statusCode: response.status, + }; + } + + private async fetchWithRetry( + url: string, + options: RequestInit, + maxRetries = 3, + backoffMs = 1, + ): Promise { + let lastError: Error | null = null; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const response = await fetch(url, options); + + // Don't retry client errors (4xx except 429) + if (response.status >= 400 && response.status < 500 && response.status !== 429) { + return response; + } + + if (response.ok) { + return response; + } + + // Retry 429 and 5xx + lastError = new Error(`HTTP ${response.status}`); + if (attempt < maxRetries) { + await new Promise(r => setTimeout(r, backoffMs)); + continue; + } + return response; + } catch (err) { + lastError = err as Error; + if (attempt < maxRetries) { + await new Promise(r => setTimeout(r, backoffMs)); + continue; + } + throw { + code: 'INTERNAL' as const, + message: `Network error: ${lastError.message}`, + statusCode: 500, + }; + } + } + + throw { + code: 'INTERNAL' as const, + message: 'Failed after retries', + statusCode: 500, + }; + } +} diff --git a/packages/vector-search/src/plugin/VectorSearchPlugin.ts b/packages/vector-search/src/plugin/VectorSearchPlugin.ts new file mode 100644 index 00000000..97d4de92 --- /dev/null +++ b/packages/vector-search/src/plugin/VectorSearchPlugin.ts @@ -0,0 +1,105 @@ +import type { VectorSearchPluginConfig, IndexConfig, SearchRequest, SearchResponse } from './types'; +import { VectorSearchClient } from './VectorSearchClient'; +import { ServicePrincipalTokenProvider, OboTokenExtractor } from './auth'; + +export class VectorSearchPlugin { + static manifest = { + name: 'vector-search', + description: 'Query Databricks Vector Search indexes from your app', + resources: { required: [] as any[], optional: [] as any[] }, + env: [ + { name: 'DATABRICKS_HOST', description: 'Databricks workspace hostname', source: 'auto' }, + { name: 'DATABRICKS_CLIENT_ID', description: 'Service principal client ID', source: 'auto' }, + { name: 'DATABRICKS_CLIENT_SECRET', description: 'Service principal client secret', source: 'auto' }, + ], + }; + + private config: VectorSearchPluginConfig; + private client!: VectorSearchClient; + private spTokenProvider!: ServicePrincipalTokenProvider; + + constructor(config: VectorSearchPluginConfig) { + this.config = config; + } + + async setup(): Promise { + const host = process.env.DATABRICKS_HOST; + if (!host) { + throw new Error( + 'DATABRICKS_HOST is not set. Ensure the app is deployed to Databricks Apps or set the environment variable manually.', + ); + } + + // Fail-fast config validation + for (const [alias, idx] of Object.entries(this.config.indexes)) { + if (!idx.indexName) { + throw new Error(`Index "${alias}" is missing required field "indexName"`); + } + if (!idx.columns || idx.columns.length === 0) { + throw new Error(`Index "${alias}" is missing required field "columns"`); + } + if (idx.pagination && !idx.endpointName) { + throw new Error(`Index "${alias}" has pagination enabled but is missing "endpointName"`); + } + } + + this.spTokenProvider = new ServicePrincipalTokenProvider(host); + this.client = new VectorSearchClient({ host, tokenProvider: this.spTokenProvider }); + } + + async shutdown(): Promise { + // No cleanup needed currently + } + + getResourceRequirements() { + return Object.values(this.config.indexes).map((idx) => ({ + type: 'vector-search-index' as const, + name: idx.indexName, + permission: 'SELECT' as const, + })); + } + + exports() { + return { + query: (alias: string, request: SearchRequest) => this.executeQuery(alias, request), + }; + } + + /** Resolve an index alias to its config. Throws if not found. */ + resolveIndex(alias: string): IndexConfig { + const config = this.config.indexes[alias]; + if (!config) { + throw { + code: 'INDEX_NOT_FOUND' as const, + message: `No index configured with alias "${alias}"`, + statusCode: 404, + }; + } + return config; + } + + /** Get the VS client instance (used by route handlers) */ + getClient(): VectorSearchClient { + return this.client; + } + + /** Get the full plugin config (used by route handlers) */ + getConfig(): VectorSearchPluginConfig { + return this.config; + } + + private async executeQuery(alias: string, request: SearchRequest): Promise { + const indexConfig = this.resolveIndex(alias); + return this.client.query({ + indexName: indexConfig.indexName, + queryText: request.queryText, + queryVector: request.queryVector, + columns: request.columns ?? indexConfig.columns, + numResults: request.numResults ?? indexConfig.numResults ?? 20, + queryType: request.queryType ?? indexConfig.queryType ?? 'hybrid', + filters: request.filters, + reranker: request.reranker ?? indexConfig.reranker ?? false, + embeddingFn: indexConfig.embeddingFn, + }); + } +} diff --git a/packages/vector-search/src/plugin/auth.ts b/packages/vector-search/src/plugin/auth.ts new file mode 100644 index 00000000..62211a8e --- /dev/null +++ b/packages/vector-search/src/plugin/auth.ts @@ -0,0 +1,48 @@ +import type { TokenProvider, SearchError } from './types'; + +export class ServicePrincipalTokenProvider implements TokenProvider { + private token: string | null = null; + private expiresAt = 0; + private host: string; + + constructor(host: string) { + this.host = host; + } + + async getToken(): Promise { + if (this.token && Date.now() < this.expiresAt - 120_000) { + return this.token; + } + + const response = await fetch(`https://${this.host}/oidc/v1/token`, { + method: 'POST', + headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, + body: new URLSearchParams({ + grant_type: 'client_credentials', + client_id: process.env.DATABRICKS_CLIENT_ID!, + client_secret: process.env.DATABRICKS_CLIENT_SECRET!, + scope: 'all-apis', + }).toString(), + }); + + const data = await response.json(); + this.token = data.access_token; + this.expiresAt = Date.now() + data.expires_in * 1000; + return this.token!; + } +} + +export class OboTokenExtractor { + static extractFromRequest(req: { headers: Record }): string { + const token = req.headers['x-forwarded-access-token']; + if (!token) { + const error: SearchError = { + code: 'UNAUTHORIZED', + message: 'No user token found. Ensure app is configured for user authorization.', + statusCode: 401, + }; + throw error; + } + return token; + } +} diff --git a/packages/vector-search/src/plugin/routes.ts b/packages/vector-search/src/plugin/routes.ts new file mode 100644 index 00000000..cd58e354 --- /dev/null +++ b/packages/vector-search/src/plugin/routes.ts @@ -0,0 +1,136 @@ +import { Router } from 'express'; +import type { Request, Response } from 'express'; +import type { VectorSearchPlugin } from './VectorSearchPlugin'; +import { OboTokenExtractor } from './auth'; +import type { SearchRequest } from './types'; + +export function createVectorSearchRouter(plugin: VectorSearchPlugin): Router { + const router = Router(); + + // POST /:alias/query + router.post('/:alias/query', async (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + const body: SearchRequest = req.body; + + if (!body.queryText && !body.queryVector) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: 'queryText or queryVector is required', + statusCode: 400, + }); + } + + // Resolve auth + let userToken: string | undefined; + if (indexConfig.auth === 'on-behalf-of-user') { + try { + userToken = OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + return res.status(401).json(err); + } + } + + try { + const client = plugin.getClient(); + const response = await client.query({ + indexName: indexConfig.indexName, + queryText: body.queryText, + queryVector: body.queryVector, + columns: body.columns ?? indexConfig.columns, + numResults: body.numResults ?? indexConfig.numResults ?? 20, + queryType: body.queryType ?? indexConfig.queryType ?? 'hybrid', + filters: body.filters, + reranker: body.reranker ?? indexConfig.reranker ?? false, + userToken, + embeddingFn: indexConfig.embeddingFn, + }); + + return res.json(response); + } catch (err: any) { + return res.status(err.statusCode ?? 500).json(err); + } + }); + + // POST /:alias/next-page + router.post('/:alias/next-page', async (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + if (!indexConfig.pagination) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: `Pagination is not enabled for index "${alias}"`, + statusCode: 400, + }); + } + + const { pageToken } = req.body; + if (!pageToken) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: 'pageToken is required', + statusCode: 400, + }); + } + + let userToken: string | undefined; + if (indexConfig.auth === 'on-behalf-of-user') { + try { + userToken = OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + return res.status(401).json(err); + } + } + + try { + const client = plugin.getClient(); + const response = await client.queryNextPage({ + indexName: indexConfig.indexName, + endpointName: indexConfig.endpointName!, + pageToken, + userToken, + }); + + return res.json(response); + } catch (err: any) { + return res.status(err.statusCode ?? 500).json(err); + } + }); + + // GET /:alias/config + router.get('/:alias/config', (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + return res.json({ + alias, + columns: indexConfig.columns, + queryType: indexConfig.queryType ?? 'hybrid', + numResults: indexConfig.numResults ?? 20, + reranker: !!indexConfig.reranker, + pagination: !!indexConfig.pagination, + }); + }); + + return router; +} diff --git a/packages/vector-search/src/plugin/types.ts b/packages/vector-search/src/plugin/types.ts new file mode 100644 index 00000000..f8ad1e84 --- /dev/null +++ b/packages/vector-search/src/plugin/types.ts @@ -0,0 +1,198 @@ +// ============================================ +// Plugin Configuration Types +// ============================================ + +export interface VectorSearchPluginConfig { + indexes: Record; +} + +export interface IndexConfig { + /** Three-level UC name: catalog.schema.index_name */ + indexName: string; + /** Columns to return in results */ + columns: string[]; + /** Default search mode */ + queryType?: 'ann' | 'hybrid' | 'full_text'; // default: 'hybrid' + /** Max results per query */ + numResults?: number; // default: 20 + /** Enable built-in reranker */ + reranker?: boolean | RerankerConfig; // default: false + /** Auth mode */ + auth?: 'service-principal' | 'on-behalf-of-user'; // default: 'service-principal' + /** Result caching */ + cache?: CacheConfig; + /** Enable cursor pagination */ + pagination?: boolean; // default: false + /** VS endpoint name (required if pagination: true) */ + endpointName?: string; + /** + * For self-managed embedding indexes: converts query text to embedding vector. + * If provided, the plugin calls this function and sends query_vector to VS. + * If omitted, the plugin sends query_text and VS computes embeddings (managed mode). + */ + embeddingFn?: (text: string) => Promise; +} + +export interface RerankerConfig { + columnsToRerank: string[]; +} + +export interface CacheConfig { + enabled: boolean; + ttlSeconds?: number; // default: 60 + maxEntries?: number; // default: 1000 +} + +// ============================================ +// Query Types (frontend → backend) +// ============================================ + +export interface SearchRequest { + /** Text query. Required for managed embedding indexes. */ + queryText?: string; + /** Pre-computed embedding vector. Required for self-managed indexes without embeddingFn. */ + queryVector?: number[]; + /** Override default columns for this query */ + columns?: string[]; + /** Override default numResults for this query */ + numResults?: number; + /** Override default queryType for this query */ + queryType?: 'ann' | 'hybrid' | 'full_text'; + /** Metadata filters */ + filters?: SearchFilters; + /** Override reranker for this query */ + reranker?: boolean; +} + +/** + * Filters use the VS REST API filter format. + * Keys are column names with optional operators. + * + * Examples: + * { category: ['electronics', 'books'] } // IN list + * { 'price >=': 10 } // comparison + * { 'title NOT': 'test' } // NOT + * { 'name LIKE': 'data%' } // LIKE + * { 'color1 OR color2': ['red', 'blue'] } // OR across columns + */ +export type SearchFilters = Record; + +// ============================================ +// Result Types (backend → frontend) +// ============================================ + +export interface SearchResponse = Record> { + /** Search results */ + results: SearchResult[]; + /** Total number of results */ + totalCount: number; + /** Query execution time in ms (from VS debug info) */ + queryTimeMs: number; + /** The query type that was actually used */ + queryType: 'ann' | 'hybrid' | 'full_text'; + /** Whether results were served from cache */ + fromCache: boolean; + /** Token for fetching next page. Null if no more results. */ + nextPageToken: string | null; +} + +export interface SearchResult = Record> { + /** Similarity score (0-1, higher = more similar) */ + score: number; + /** The result data — keys match the columns requested */ + data: T; +} + +// ============================================ +// Error Types +// ============================================ + +export interface SearchError { + code: 'UNAUTHORIZED' | 'INDEX_NOT_FOUND' | 'INVALID_QUERY' | 'RATE_LIMITED' | 'INTERNAL'; + message: string; + /** HTTP status from VS API */ + statusCode: number; +} + +// ============================================ +// Hook Types +// ============================================ + +export interface UseVectorSearchOptions { + /** Debounce delay in ms. Default: 300 */ + debounceMs?: number; + /** Override default numResults from server config */ + numResults?: number; + /** Override default queryType from server config */ + queryType?: 'ann' | 'hybrid' | 'full_text'; + /** Override reranker from server config */ + reranker?: boolean; + /** Initial filters */ + initialFilters?: SearchFilters; + /** Callback when search completes */ + onResults?: (response: SearchResponse) => void; + /** Callback on error */ + onError?: (error: SearchError) => void; + /** Minimum query length before searching. Default: 1 */ + minQueryLength?: number; +} + +export interface UseVectorSearchReturn = Record> { + /** Execute a search */ + search: (query: string) => void; + /** Current results */ + results: SearchResult[]; + /** Whether a search is in flight */ + isLoading: boolean; + /** Error from the last search, if any */ + error: SearchError | null; + /** Total result count */ + totalCount: number; + /** Query time in ms */ + queryTimeMs: number; + /** Whether results came from cache */ + fromCache: boolean; + /** Current query text */ + query: string; + /** Set filters programmatically */ + setFilters: (filters: SearchFilters) => void; + /** Current active filters */ + activeFilters: SearchFilters; + /** Clear all filters and results */ + clear: () => void; + /** Whether more results are available (pagination) */ + hasMore?: boolean; + /** Fetch next page and append to results (pagination) */ + loadMore?: () => void; + /** Whether a loadMore is in flight (pagination) */ + isLoadingMore?: boolean; +} + +// ============================================ +// Internal Types (not exported from package) +// ============================================ + +/** Raw response from VS REST API */ +export interface VsRawResponse { + manifest: { + column_count: number; + columns: Array<{ name: string; type?: string }>; + }; + result: { + row_count: number; + data_array: unknown[][]; + }; + next_page_token?: string | null; + debug_info?: { + response_time?: number; + ann_time?: number; + embedding_gen_time?: number; + latency_ms?: number; + [key: string]: unknown; + }; +} + +/** Token provider interface for auth */ +export interface TokenProvider { + getToken(): Promise; +} diff --git a/packages/vector-search/src/ui/components/SearchBox.tsx b/packages/vector-search/src/ui/components/SearchBox.tsx new file mode 100644 index 00000000..dc9c83bd --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchBox.tsx @@ -0,0 +1,73 @@ +import * as React from 'react'; + +interface SearchBoxProps { + onSearch: (query: string) => void; + value?: string; + placeholder?: string; + isLoading?: boolean; + autoFocus?: boolean; + className?: string; +} + +export function SearchBox({ + onSearch, + value, + placeholder = 'Search...', + isLoading = false, + autoFocus = false, + className, +}: SearchBoxProps) { + const [internalValue, setInternalValue] = React.useState(''); + const displayValue = value ?? internalValue; + const inputRef = React.useRef(null); + + const handleChange = (e: React.ChangeEvent) => { + const val = e.target.value; + if (value === undefined) setInternalValue(val); + onSearch(val); + }; + + const handleClear = () => { + if (value === undefined) setInternalValue(''); + onSearch(''); + inputRef.current?.focus(); + }; + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === 'Escape') handleClear(); + }; + + return ( +
+ + + + + {isLoading && ( +
+ )} + {displayValue && !isLoading && ( + + )} +
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchLoadMore.tsx b/packages/vector-search/src/ui/components/SearchLoadMore.tsx new file mode 100644 index 00000000..a351c247 --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchLoadMore.tsx @@ -0,0 +1,24 @@ +import * as React from 'react'; + +interface SearchLoadMoreProps { + hasMore: boolean; + isLoading: boolean; + onLoadMore: () => void; + className?: string; +} + +export function SearchLoadMore({ hasMore, isLoading, onLoadMore, className }: SearchLoadMoreProps) { + if (!hasMore) return null; + + return ( +
+ +
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchResultCard.tsx b/packages/vector-search/src/ui/components/SearchResultCard.tsx new file mode 100644 index 00000000..630e6b31 --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchResultCard.tsx @@ -0,0 +1,76 @@ +import * as React from 'react'; +import type { SearchResult } from '../../plugin/types'; + +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +interface SearchResultCardProps> { + result: SearchResult; + titleColumn?: keyof T; + descriptionColumn?: keyof T; + displayColumns?: (keyof T)[]; + showScore?: boolean; + query?: string; +} + +export function SearchResultCard>({ + result, + titleColumn, + descriptionColumn, + displayColumns, + showScore = false, + query, +}: SearchResultCardProps) { + const title = titleColumn ? String(result.data[titleColumn] ?? '') : undefined; + const description = descriptionColumn ? String(result.data[descriptionColumn] ?? '') : undefined; + + const highlight = (text: string): React.ReactNode => { + if (!query) return text; + const words = query.split(/\s+/).filter(w => w.length > 0); + if (words.length === 0) return text; + const regex = new RegExp(`(${words.map(escapeRegex).join('|')})`, 'gi'); + const parts = text.split(regex); + return parts.map((part, i) => + regex.test(part) + ? {part} + : part + ); + }; + + return ( +
+
+
+ {title && ( +

+ {highlight(title)} +

+ )} + {description && ( +

+ {highlight(description)} +

+ )} + {displayColumns && ( +
+ {displayColumns + .filter(col => col !== titleColumn && col !== descriptionColumn) + .map(col => ( + + {String(col)}:{' '} + {String(result.data[col] ?? '—')} + + ))} +
+ )} +
+ {showScore && ( + + {(result.score * 100).toFixed(0)}% + + )} +
+
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchResults.tsx b/packages/vector-search/src/ui/components/SearchResults.tsx new file mode 100644 index 00000000..64d5fc9d --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchResults.tsx @@ -0,0 +1,93 @@ +import * as React from 'react'; +import type { SearchResult, SearchError } from '../../plugin/types'; +import { SearchResultCard } from './SearchResultCard'; + +interface SearchResultsProps> { + results: SearchResult[]; + isLoading: boolean; + error: SearchError | null; + query: string; + totalCount: number; + queryTimeMs: number; + renderResult?: (result: SearchResult, index: number) => React.ReactNode; + displayColumns?: (keyof T)[]; + titleColumn?: keyof T; + descriptionColumn?: keyof T; + showScores?: boolean; + emptyMessage?: string; + className?: string; +} + +export function SearchResults>({ + results, + isLoading, + error, + query, + totalCount, + queryTimeMs, + renderResult, + displayColumns, + titleColumn, + descriptionColumn, + showScores = false, + emptyMessage = 'No results found.', + className, +}: SearchResultsProps) { + if (error) { + return ( +
+

Search failed

+

{error.message}

+
+ ); + } + + if (isLoading && results.length === 0) { + return ( +
+ {Array.from({ length: 3 }).map((_, i) => ( +
+
+
+
+
+ ))} +
+ ); + } + + if (!query) return null; + + if (results.length === 0) { + return ( +
+ {emptyMessage} +
+ ); + } + + return ( +
+
+ {totalCount} result{totalCount !== 1 ? 's' : ''} in {queryTimeMs}ms +
+
+ {results.map((result, index) => + renderResult + ? renderResult(result, index) + : ( + + ) + )} +
+
+ ); +} diff --git a/packages/vector-search/src/ui/hooks/useVectorSearch.ts b/packages/vector-search/src/ui/hooks/useVectorSearch.ts new file mode 100644 index 00000000..ad43d194 --- /dev/null +++ b/packages/vector-search/src/ui/hooks/useVectorSearch.ts @@ -0,0 +1,175 @@ +import { useState, useCallback, useRef, useEffect } from 'react'; +import type { + SearchResult, + SearchResponse, + SearchError, + SearchFilters, + UseVectorSearchOptions, + UseVectorSearchReturn, +} from '../../plugin/types'; + +export function useVectorSearch = Record>( + alias: string, + options: UseVectorSearchOptions = {}, +): UseVectorSearchReturn { + const { + debounceMs = 300, + numResults, + queryType, + reranker, + initialFilters = {}, + onResults, + onError, + minQueryLength = 1, + } = options; + + const [results, setResults] = useState[]>([]); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + const [query, setQuery] = useState(''); + const [totalCount, setTotalCount] = useState(0); + const [queryTimeMs, setQueryTimeMs] = useState(0); + const [fromCache, setFromCache] = useState(false); + const [activeFilters, setActiveFilters] = useState(initialFilters); + const [hasMore, setHasMore] = useState(false); + const [isLoadingMore, setIsLoadingMore] = useState(false); + + const nextPageTokenRef = useRef(null); + const abortRef = useRef(null); + const debounceRef = useRef | null>(null); + + const executeSearch = useCallback(async ( + searchQuery: string, + filters: SearchFilters, + isLoadMore = false, + ) => { + if (abortRef.current) abortRef.current.abort(); + abortRef.current = new AbortController(); + + if (!isLoadMore) { + setIsLoading(true); + setError(null); + } else { + setIsLoadingMore(true); + } + + try { + const url = isLoadMore + ? `/api/vector-search/${alias}/next-page` + : `/api/vector-search/${alias}/query`; + + const body: Record = isLoadMore + ? { pageToken: nextPageTokenRef.current } + : { + queryText: searchQuery, + ...(Object.keys(filters).length > 0 ? { filters } : {}), + ...(numResults !== undefined ? { numResults } : {}), + ...(queryType !== undefined ? { queryType } : {}), + ...(reranker !== undefined ? { reranker } : {}), + }; + + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal: abortRef.current.signal, + }); + + if (!response.ok) { + const err = await response.json(); + throw err as SearchError; + } + + const data: SearchResponse = await response.json(); + + if (isLoadMore) { + setResults(prev => [...prev, ...data.results]); + } else { + setResults(data.results); + } + + setTotalCount(data.totalCount); + setQueryTimeMs(data.queryTimeMs); + setFromCache(data.fromCache); + setHasMore(!!data.nextPageToken); + nextPageTokenRef.current = data.nextPageToken; + + onResults?.(data as SearchResponse); + } catch (err: unknown) { + if (err instanceof DOMException && err.name === 'AbortError') return; + const searchError = err as SearchError; + setError(searchError); + onError?.(searchError); + } finally { + setIsLoading(false); + setIsLoadingMore(false); + } + }, [alias, numResults, queryType, reranker, onResults, onError]); + + const search = useCallback((searchQuery: string) => { + setQuery(searchQuery); + + if (debounceRef.current) clearTimeout(debounceRef.current); + + if (searchQuery.length < minQueryLength) { + setResults([]); + setTotalCount(0); + setHasMore(false); + return; + } + + debounceRef.current = setTimeout(() => { + executeSearch(searchQuery, activeFilters); + }, debounceMs); + }, [debounceMs, minQueryLength, activeFilters, executeSearch]); + + const setFilters = useCallback((filters: SearchFilters) => { + setActiveFilters(filters); + if (query.length >= minQueryLength) { + executeSearch(query, filters); + } + }, [query, minQueryLength, executeSearch]); + + const loadMore = useCallback(() => { + if (hasMore && !isLoadingMore && nextPageTokenRef.current) { + executeSearch(query, activeFilters, true); + } + }, [hasMore, isLoadingMore, query, activeFilters, executeSearch]); + + const clear = useCallback(() => { + if (debounceRef.current) clearTimeout(debounceRef.current); + if (abortRef.current) abortRef.current.abort(); + setQuery(''); + setResults([]); + setError(null); + setTotalCount(0); + setQueryTimeMs(0); + setFromCache(false); + setHasMore(false); + nextPageTokenRef.current = null; + }, []); + + useEffect(() => { + return () => { + if (debounceRef.current) clearTimeout(debounceRef.current); + if (abortRef.current) abortRef.current.abort(); + }; + }, []); + + return { + search, + results, + isLoading, + error, + totalCount, + queryTimeMs, + fromCache, + query, + setFilters, + activeFilters, + clear, + hasMore, + loadMore, + isLoadingMore, + }; +} diff --git a/packages/vector-search/src/ui/index.ts b/packages/vector-search/src/ui/index.ts new file mode 100644 index 00000000..47797d97 --- /dev/null +++ b/packages/vector-search/src/ui/index.ts @@ -0,0 +1,6 @@ +export { useVectorSearch } from './hooks/useVectorSearch'; +export { SearchBox } from './components/SearchBox'; +export { SearchResults } from './components/SearchResults'; +export { SearchResultCard } from './components/SearchResultCard'; +export { SearchLoadMore } from './components/SearchLoadMore'; +export type { UseVectorSearchOptions, UseVectorSearchReturn } from '../plugin/types'; diff --git a/packages/vector-search/tests/integration/dogfood.test.ts b/packages/vector-search/tests/integration/dogfood.test.ts new file mode 100644 index 00000000..f0aa017e --- /dev/null +++ b/packages/vector-search/tests/integration/dogfood.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect, beforeAll } from 'vitest'; +import { VectorSearchClient } from '../../src/plugin/VectorSearchClient'; + +const DOGFOOD_HOST = 'e2-dogfood.staging.cloud.databricks.com'; +const TEST_INDEX = 'gurary_catalog.vector-search-brickfood.retrieval_perf_cuj_index_1'; + +// Skip unless DOGFOOD_TOKEN is set +describe.skipIf(!process.env.DOGFOOD_TOKEN)('Integration: VectorSearchClient → dogfood', () => { + let client: VectorSearchClient; + + beforeAll(() => { + client = new VectorSearchClient({ + host: DOGFOOD_HOST, + tokenProvider: { + getToken: async () => process.env.DOGFOOD_TOKEN!, + }, + }); + }); + + it('returns results for a valid hybrid query', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'aircraft instruments', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'hybrid', + }); + expect(response.results.length).toBeGreaterThan(0); + expect(response.results[0].score).toBeGreaterThan(0); + expect(response.results[0].data).toHaveProperty('text'); + expect(response.results[0].data).toHaveProperty('chunk_id'); + expect(response.queryTimeMs).toBeGreaterThan(0); + }, 30000); + + it('returns results for ANN query', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'navigation systems', + columns: ['chunk_id', 'text'], + numResults: 3, + queryType: 'ann', + }); + expect(response.results.length).toBeGreaterThan(0); + expect(response.results[0].score).toBeGreaterThan(0); + }, 30000); + + it('respects numResults limit', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'flight', + columns: ['chunk_id', 'text'], + numResults: 2, + queryType: 'hybrid', + }); + expect(response.results.length).toBeLessThanOrEqual(2); + }, 30000); + + it('returns scores between 0 and 1', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'altitude', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'hybrid', + }); + response.results.forEach(r => { + expect(r.score).toBeGreaterThanOrEqual(0); + expect(r.score).toBeLessThanOrEqual(1); + }); + }, 30000); + + it('handles empty results gracefully', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'xyzzy_absolutely_no_match_12345_qwerty', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'ann', + }); + // May still return results due to embedding similarity, but should have low scores + // If no results, that's fine too + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + }, 30000); + + it('response includes queryTimeMs from debug_info', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'weather radar', + columns: ['chunk_id', 'text'], + numResults: 3, + queryType: 'hybrid', + }); + expect(response.queryTimeMs).toBeGreaterThan(0); + expect(response.fromCache).toBe(false); + }, 30000); +}); diff --git a/packages/vector-search/tests/plugin/VectorSearchClient.test.ts b/packages/vector-search/tests/plugin/VectorSearchClient.test.ts new file mode 100644 index 00000000..8335a4f5 --- /dev/null +++ b/packages/vector-search/tests/plugin/VectorSearchClient.test.ts @@ -0,0 +1,233 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { VectorSearchClient } from '../../src/plugin/VectorSearchClient'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +const mockTokenProvider = { getToken: vi.fn().mockResolvedValue('sp-token-123') }; + +describe('VectorSearchClient', () => { + let client: VectorSearchClient; + + beforeEach(() => { + client = new VectorSearchClient({ + host: 'test-workspace.databricks.com', + tokenProvider: mockTokenProvider, + }); + mockFetch.mockReset(); + mockTokenProvider.getToken.mockClear(); + }); + + const validResponse = { + manifest: { column_count: 3, columns: [{ name: 'id' }, { name: 'title' }, { name: 'score' }] }, + result: { row_count: 2, data_array: [[1, 'ML Guide', 0.95], [2, 'AI Primer', 0.87]] }, + next_page_token: null, + debug_info: { response_time: 35 }, + }; + + describe('query()', () => { + it('constructs correct REST API URL and request body for hybrid search', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'machine learning', + columns: ['id', 'title'], numResults: 10, queryType: 'hybrid', + }); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-workspace.databricks.com/api/2.0/vector-search/indexes/cat.sch.idx/query'); + const body = JSON.parse(opts.body); + expect(body.query_text).toBe('machine learning'); + expect(body.query_type).toBe('HYBRID'); + expect(body.num_results).toBe(10); + expect(body.columns).toEqual(['id', 'title']); + expect(body.debug_level).toBe(1); + }); + + it('includes filters when provided', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', filters: { category: ['books'] }, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.filters).toEqual({ category: ['books'] }); + }); + + it('omits filters when empty object', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', filters: {}, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.filters).toBeUndefined(); + }); + + it('includes reranker config when boolean true', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'hybrid', reranker: true, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.reranker.model).toBe('databricks_reranker'); + // Default: all non-id columns + expect(body.reranker.parameters.columns_to_rerank).toEqual(['title']); + }); + + it('includes custom reranker columnsToRerank', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title', 'desc'], + numResults: 5, queryType: 'hybrid', reranker: { columnsToRerank: ['desc'] }, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.reranker.parameters.columns_to_rerank).toEqual(['desc']); + }); + + it('parses VS data_array response into typed SearchResult[]', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 10, queryType: 'hybrid', + }); + expect(result.results).toHaveLength(2); + expect(result.results[0].score).toBe(0.95); + expect(result.results[0].data).toEqual({ id: 1, title: 'ML Guide' }); + expect(result.results[1].score).toBe(0.87); + expect(result.results[1].data).toEqual({ id: 2, title: 'AI Primer' }); + expect(result.totalCount).toBe(2); + expect(result.queryTimeMs).toBe(35); + expect(result.fromCache).toBe(false); + expect(result.nextPageToken).toBeNull(); + }); + + it('handles next_page_token in response', async () => { + const responseWithToken = { ...validResponse, next_page_token: 'abc123' }; + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(responseWithToken) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 10, queryType: 'hybrid', + }); + expect(result.nextPageToken).toBe('abc123'); + }); + + it('uses SP token when no userToken provided', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', + }); + expect(mockTokenProvider.getToken).toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][1].headers['Authorization']).toBe('Bearer sp-token-123'); + }); + + it('uses userToken when provided (OBO)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', userToken: 'user-token-456', + }); + expect(mockTokenProvider.getToken).not.toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][1].headers['Authorization']).toBe('Bearer user-token-456'); + }); + + it('calls embeddingFn and sends query_vector for self-managed indexes', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + const mockEmbeddingFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', embeddingFn: mockEmbeddingFn, + }); + expect(mockEmbeddingFn).toHaveBeenCalledWith('test'); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.query_vector).toEqual([0.1, 0.2, 0.3]); + expect(body.query_text).toBeUndefined(); + }); + + it('sends query_text when no embeddingFn (managed embeddings)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.query_text).toBe('test'); + expect(body.query_vector).toBeUndefined(); + }); + + it('throws INVALID_QUERY when neither queryText nor queryVector provided', async () => { + await expect(client.query({ + indexName: 'x', columns: ['id'], numResults: 1, queryType: 'ann', + } as any)).rejects.toMatchObject({ code: 'INVALID_QUERY' }); + }); + + it('maps 401 → UNAUTHORIZED', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 401 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'UNAUTHORIZED', statusCode: 401 }); + }); + + it('maps 404 → INDEX_NOT_FOUND', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 404 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INDEX_NOT_FOUND', statusCode: 404 }); + }); + + it('maps 429 → RATE_LIMITED and retries', async () => { + mockFetch + .mockResolvedValueOnce({ ok: false, status: 429 }) + .mockResolvedValueOnce({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', + }); + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(result.results).toHaveLength(2); + }); + + it('does not retry 400 errors', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 400 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INVALID_QUERY' }); + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + + it('retries 500 errors up to 3 times', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 500 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INTERNAL', statusCode: 500 }); + expect(mockFetch).toHaveBeenCalledTimes(4); // 1 initial + 3 retries + }); + + it('retries network errors', async () => { + mockFetch + .mockRejectedValueOnce(new Error('ECONNRESET')) + .mockResolvedValueOnce({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', + }); + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(result.results).toHaveLength(2); + }); + }); + + describe('queryNextPage()', () => { + it('calls the query-next-page endpoint with page token', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.queryNextPage({ + indexName: 'cat.sch.idx', endpointName: 'my-endpoint', + pageToken: 'token123', + }); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-workspace.databricks.com/api/2.0/vector-search/indexes/cat.sch.idx/query-next-page'); + const body = JSON.parse(opts.body); + expect(body.endpoint_name).toBe('my-endpoint'); + expect(body.page_token).toBe('token123'); + }); + }); +}); diff --git a/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts b/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts new file mode 100644 index 00000000..479723fa --- /dev/null +++ b/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { VectorSearchPlugin } from '../../src/plugin/VectorSearchPlugin'; + +describe('VectorSearchPlugin', () => { + beforeEach(() => { + vi.stubEnv('DATABRICKS_HOST', 'test-host.databricks.com'); + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-secret'); + }); + + describe('setup()', () => { + it('throws if DATABRICKS_HOST is not set', async () => { + vi.stubEnv('DATABRICKS_HOST', ''); + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('DATABRICKS_HOST'); + }); + + it('throws if any index is missing indexName', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: '', columns: ['id'] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('indexName'); + }); + + it('throws if any index is missing columns', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: [] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('columns'); + }); + + it('throws if pagination enabled but no endpointName', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'], pagination: true }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('endpointName'); + }); + + it('succeeds with valid config', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'cat.sch.products_idx', + columns: ['id', 'name', 'description'], + queryType: 'hybrid', + numResults: 20, + }, + docs: { + indexName: 'cat.sch.docs_idx', + columns: ['id', 'title', 'content'], + reranker: true, + auth: 'on-behalf-of-user', + }, + }, + }); + await expect(plugin.setup()).resolves.not.toThrow(); + }); + }); + + describe('exports()', () => { + it('returns object with query function', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'] }, + }, + }); + await plugin.setup(); + const exports = plugin.exports(); + expect(exports).toHaveProperty('query'); + expect(typeof exports.query).toBe('function'); + }); + }); + + describe('getResourceRequirements()', () => { + it('returns resource entry for each configured index', () => { + const plugin = new VectorSearchPlugin({ + indexes: { + products: { indexName: 'cat.sch.products', columns: ['id'] }, + docs: { indexName: 'cat.sch.docs', columns: ['id'] }, + }, + }); + const resources = plugin.getResourceRequirements(); + expect(resources).toHaveLength(2); + expect(resources[0]).toEqual({ + type: 'vector-search-index', + name: 'cat.sch.products', + permission: 'SELECT', + }); + expect(resources[1]).toEqual({ + type: 'vector-search-index', + name: 'cat.sch.docs', + permission: 'SELECT', + }); + }); + }); + + describe('manifest', () => { + it('has correct name and env declarations', () => { + expect(VectorSearchPlugin.manifest.name).toBe('vector-search'); + expect(VectorSearchPlugin.manifest.env).toContainEqual( + expect.objectContaining({ name: 'DATABRICKS_HOST' }) + ); + }); + }); +}); diff --git a/packages/vector-search/tests/plugin/auth.test.ts b/packages/vector-search/tests/plugin/auth.test.ts new file mode 100644 index 00000000..925b4ef4 --- /dev/null +++ b/packages/vector-search/tests/plugin/auth.test.ts @@ -0,0 +1,108 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { ServicePrincipalTokenProvider, OboTokenExtractor } from '../../src/plugin/auth'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +describe('ServicePrincipalTokenProvider', () => { + let provider: ServicePrincipalTokenProvider; + + beforeEach(() => { + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client-id'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-client-secret'); + provider = new ServicePrincipalTokenProvider('test-host.databricks.com'); + mockFetch.mockReset(); + vi.useRealTimers(); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + }); + + it('fetches token from OIDC endpoint', async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-abc', expires_in: 3600 }), + }); + + const token = await provider.getToken(); + + expect(token).toBe('token-abc'); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-host.databricks.com/oidc/v1/token'); + expect(opts.method).toBe('POST'); + expect(opts.headers['Content-Type']).toBe('application/x-www-form-urlencoded'); + const body = new URLSearchParams(opts.body); + expect(body.get('grant_type')).toBe('client_credentials'); + expect(body.get('client_id')).toBe('test-client-id'); + expect(body.get('client_secret')).toBe('test-client-secret'); + expect(body.get('scope')).toBe('all-apis'); + }); + + it('returns cached token on subsequent calls within expiry', async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-abc', expires_in: 3600 }), + }); + + await provider.getToken(); + await provider.getToken(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + + it('refreshes token when within 2-minute expiry buffer', async () => { + vi.useFakeTimers(); + + mockFetch + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-1', expires_in: 3600 }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-2', expires_in: 3600 }), + }); + + const token1 = await provider.getToken(); + expect(token1).toBe('token-1'); + + // Advance to within 2 minutes of expiry (3600s - 120s = 3480s) + vi.advanceTimersByTime(3481 * 1000); + + const token2 = await provider.getToken(); + expect(token2).toBe('token-2'); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); +}); + +describe('OboTokenExtractor', () => { + it('extracts token from x-forwarded-access-token header', () => { + const req = { + headers: { 'x-forwarded-access-token': 'user-token-xyz' }, + } as any; + + const token = OboTokenExtractor.extractFromRequest(req); + expect(token).toBe('user-token-xyz'); + }); + + it('throws UNAUTHORIZED when header is missing', () => { + const req = { headers: {} } as any; + + expect(() => OboTokenExtractor.extractFromRequest(req)).toThrow(); + try { + OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + expect(err.code).toBe('UNAUTHORIZED'); + expect(err.statusCode).toBe(401); + } + }); + + it('throws UNAUTHORIZED when header is empty string', () => { + const req = { + headers: { 'x-forwarded-access-token': '' }, + } as any; + + expect(() => OboTokenExtractor.extractFromRequest(req)).toThrow(); + }); +}); diff --git a/packages/vector-search/tests/plugin/routes.test.ts b/packages/vector-search/tests/plugin/routes.test.ts new file mode 100644 index 00000000..ef043061 --- /dev/null +++ b/packages/vector-search/tests/plugin/routes.test.ts @@ -0,0 +1,206 @@ +import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest'; +import express from 'express'; +import request from 'supertest'; +import { createVectorSearchRouter } from '../../src/plugin/routes'; +import { VectorSearchPlugin } from '../../src/plugin/VectorSearchPlugin'; + +// Mock fetch for the VectorSearchClient +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +describe('Vector Search Routes', () => { + let app: express.Express; + let plugin: VectorSearchPlugin; + + const validVsResponse = { + manifest: { column_count: 3, columns: [{ name: 'id' }, { name: 'title' }, { name: 'score' }] }, + result: { row_count: 2, data_array: [[1, 'ML Guide', 0.95], [2, 'AI Primer', 0.87]] }, + next_page_token: null, + debug_info: { latency_ms: 35 }, + }; + + beforeAll(async () => { + vi.stubEnv('DATABRICKS_HOST', 'test-host.databricks.com'); + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-secret'); + + plugin = new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'cat.sch.products', + columns: ['id', 'title', 'description', 'category'], + queryType: 'hybrid', + numResults: 20, + }, + cached: { + indexName: 'cat.sch.cached', + columns: ['id', 'text'], + cache: { enabled: true, ttlSeconds: 60 }, + }, + paginated: { + indexName: 'cat.sch.paginated', + columns: ['id', 'text'], + pagination: true, + endpointName: 'my-endpoint', + }, + obo: { + indexName: 'cat.sch.obo', + columns: ['id', 'text'], + auth: 'on-behalf-of-user', + }, + }, + }); + await plugin.setup(); + + app = express(); + app.use(express.json()); + app.use('/api/vector-search', createVectorSearchRouter(plugin)); + }); + + beforeEach(() => { + mockFetch.mockReset(); + // Mock the OIDC token fetch that happens on first query + mockFetch.mockImplementation((url: string) => { + if (typeof url === 'string' && url.includes('/oidc/v1/token')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve({ access_token: 'sp-token', expires_in: 3600 }), + }); + } + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(validVsResponse), + }); + }); + }); + + describe('POST /:alias/query', () => { + it('returns results for valid query', async () => { + const res = await request(app) + .post('/api/vector-search/products/query') + .send({ queryText: 'machine learning' }) + .expect(200); + + expect(res.body.results).toHaveLength(2); + expect(res.body.results[0].score).toBe(0.95); + expect(res.body.results[0].data.title).toBe('ML Guide'); + expect(res.body.totalCount).toBe(2); + expect(res.body.queryTimeMs).toBe(35); + }); + + it('returns 404 for unknown alias', async () => { + const res = await request(app) + .post('/api/vector-search/unknown/query') + .send({ queryText: 'test' }) + .expect(404); + + expect(res.body.code).toBe('INDEX_NOT_FOUND'); + }); + + it('returns 400 for missing queryText and queryVector', async () => { + const res = await request(app) + .post('/api/vector-search/products/query') + .send({}) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + }); + + it('passes filters to VS client', async () => { + await request(app) + .post('/api/vector-search/products/query') + .send({ queryText: 'test', filters: { category: 'books' } }) + .expect(200); + + // Verify the VS API call included filters + const vsCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query') + ); + expect(vsCall).toBeDefined(); + const body = JSON.parse(vsCall![1].body); + expect(body.filters).toEqual({ category: 'books' }); + }); + + it('uses OBO token when auth is on-behalf-of-user', async () => { + await request(app) + .post('/api/vector-search/obo/query') + .set('x-forwarded-access-token', 'user-token-123') + .send({ queryText: 'test' }) + .expect(200); + + const vsCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query') + ); + expect(vsCall![1].headers['Authorization']).toBe('Bearer user-token-123'); + }); + + it('returns 401 when OBO index has no user token', async () => { + const res = await request(app) + .post('/api/vector-search/obo/query') + .send({ queryText: 'test' }) + .expect(401); + + expect(res.body.code).toBe('UNAUTHORIZED'); + }); + }); + + describe('POST /:alias/next-page', () => { + it('returns 400 when pagination not enabled', async () => { + const res = await request(app) + .post('/api/vector-search/products/next-page') + .send({ pageToken: 'abc' }) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + expect(res.body.message).toContain('Pagination'); + }); + + it('returns 400 when pageToken missing', async () => { + const res = await request(app) + .post('/api/vector-search/paginated/next-page') + .send({}) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + expect(res.body.message).toContain('pageToken'); + }); + + it('calls query-next-page endpoint when valid', async () => { + await request(app) + .post('/api/vector-search/paginated/next-page') + .send({ pageToken: 'token123' }) + .expect(200); + + const nextPageCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query-next-page') + ); + expect(nextPageCall).toBeDefined(); + const body = JSON.parse(nextPageCall![1].body); + expect(body.page_token).toBe('token123'); + expect(body.endpoint_name).toBe('my-endpoint'); + }); + }); + + describe('GET /:alias/config', () => { + it('returns public config for valid alias', async () => { + const res = await request(app) + .get('/api/vector-search/products/config') + .expect(200); + + expect(res.body.alias).toBe('products'); + expect(res.body.columns).toEqual(['id', 'title', 'description', 'category']); + expect(res.body.queryType).toBe('hybrid'); + expect(res.body.numResults).toBe(20); + expect(res.body.reranker).toBe(false); + expect(res.body.pagination).toBe(false); + }); + + it('returns 404 for unknown alias', async () => { + const res = await request(app) + .get('/api/vector-search/unknown/config') + .expect(404); + + expect(res.body.code).toBe('INDEX_NOT_FOUND'); + }); + }); +}); diff --git a/packages/vector-search/tests/ui/components/components.test.tsx b/packages/vector-search/tests/ui/components/components.test.tsx new file mode 100644 index 00000000..681f8069 --- /dev/null +++ b/packages/vector-search/tests/ui/components/components.test.tsx @@ -0,0 +1,158 @@ +import { describe, it, expect, vi } from 'vitest'; +import { render, screen, fireEvent } from '@testing-library/react'; +import { SearchBox } from '../../../src/ui/components/SearchBox'; +import { SearchResultCard } from '../../../src/ui/components/SearchResultCard'; +import { SearchResults } from '../../../src/ui/components/SearchResults'; +import { SearchLoadMore } from '../../../src/ui/components/SearchLoadMore'; + +describe('SearchBox', () => { + it('renders input with placeholder', () => { + render( {}} placeholder="Search products..." />); + expect(screen.getByPlaceholderText('Search products...')).toBeInTheDocument(); + }); + + it('calls onSearch on input change', () => { + const onSearch = vi.fn(); + render(); + fireEvent.change(screen.getByRole('searchbox'), { target: { value: 'test' } }); + expect(onSearch).toHaveBeenCalledWith('test'); + }); + + it('shows clear button when value present', () => { + render( {}} value="test" />); + expect(screen.getByLabelText('Clear search')).toBeInTheDocument(); + }); + + it('hides clear button when value empty', () => { + render( {}} value="" />); + expect(screen.queryByLabelText('Clear search')).not.toBeInTheDocument(); + }); + + it('calls onSearch with empty string on clear', () => { + const onSearch = vi.fn(); + render(); + fireEvent.click(screen.getByLabelText('Clear search')); + expect(onSearch).toHaveBeenCalledWith(''); + }); + + it('clears on Escape key', () => { + const onSearch = vi.fn(); + render(); + fireEvent.keyDown(screen.getByRole('searchbox'), { key: 'Escape' }); + expect(onSearch).toHaveBeenCalledWith(''); + }); + + it('shows loading spinner when isLoading', () => { + render( {}} isLoading />); + expect(screen.getByTestId('loading-spinner')).toBeInTheDocument(); + }); +}); + +describe('SearchResultCard', () => { + const result = { + score: 0.95, + data: { id: 1, title: 'Machine Learning Guide', description: 'A guide to ML algorithms', category: 'books' }, + }; + + it('renders title and description', () => { + render(); + expect(screen.getByText('Machine Learning Guide')).toBeInTheDocument(); + expect(screen.getByText('A guide to ML algorithms')).toBeInTheDocument(); + }); + + it('highlights query words with mark tags', () => { + const { container } = render( + + ); + const marks = container.querySelectorAll('mark'); + expect(marks.length).toBeGreaterThan(0); + expect(marks[0].textContent).toBe('Machine'); + }); + + it('shows score badge when showScore is true', () => { + render(); + expect(screen.getByText('95%')).toBeInTheDocument(); + }); + + it('hides score badge by default', () => { + render(); + expect(screen.queryByText('95%')).not.toBeInTheDocument(); + }); + + it('renders display columns as metadata', () => { + render( + + ); + expect(screen.getByText('category:')).toBeInTheDocument(); + expect(screen.getByText('books')).toBeInTheDocument(); + }); +}); + +describe('SearchResults', () => { + const results = [ + { score: 0.95, data: { id: 1, title: 'Result 1' } }, + { score: 0.87, data: { id: 2, title: 'Result 2' } }, + ]; + + it('shows loading skeleton when loading with no results', () => { + render(); + expect(screen.getByTestId('loading-skeleton')).toBeInTheDocument(); + }); + + it('shows empty message when no results', () => { + render(); + expect(screen.getByText('No results found.')).toBeInTheDocument(); + }); + + it('shows custom empty message', () => { + render(); + expect(screen.getByText('Nothing here')).toBeInTheDocument(); + }); + + it('shows error banner', () => { + const error = { code: 'INTERNAL' as const, message: 'Server error', statusCode: 500 }; + render(); + expect(screen.getByText('Search failed')).toBeInTheDocument(); + expect(screen.getByText('Server error')).toBeInTheDocument(); + }); + + it('renders results with summary', () => { + render(); + expect(screen.getByText('2 results in 35ms')).toBeInTheDocument(); + expect(screen.getByText('Result 1')).toBeInTheDocument(); + expect(screen.getByText('Result 2')).toBeInTheDocument(); + }); + + it('returns null when no query', () => { + const { container } = render(); + expect(container.firstChild).toBeNull(); + }); +}); + +describe('SearchLoadMore', () => { + it('renders button when hasMore is true', () => { + render( {}} />); + expect(screen.getByText('Load more results')).toBeInTheDocument(); + }); + + it('renders nothing when hasMore is false', () => { + const { container } = render( {}} />); + expect(container.firstChild).toBeNull(); + }); + + it('shows Loading... when isLoading', () => { + render( {}} />); + expect(screen.getByText('Loading...')).toBeInTheDocument(); + }); + + it('calls onLoadMore on click', () => { + const onLoadMore = vi.fn(); + render(); + fireEvent.click(screen.getByText('Load more results')); + expect(onLoadMore).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts b/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts new file mode 100644 index 00000000..c663c75e --- /dev/null +++ b/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts @@ -0,0 +1,201 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { renderHook, act } from '@testing-library/react'; +import { useVectorSearch } from '../../../src/ui/hooks/useVectorSearch'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +const mockResponse = { + results: [{ score: 0.95, data: { id: 1, title: 'Test Result' } }], + totalCount: 1, + queryTimeMs: 20, + queryType: 'hybrid', + fromCache: false, + nextPageToken: null, +}; + +/** Flush all pending microtasks (promise callbacks) */ +const flushPromises = () => act(() => Promise.resolve()); + +describe('useVectorSearch', () => { + beforeEach(() => { + mockFetch.mockReset(); + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it('debounces search calls (300ms default)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('a'); }); + act(() => { result.current.search('ab'); }); + act(() => { result.current.search('abc'); }); + + // Before debounce fires + expect(mockFetch).not.toHaveBeenCalled(); + + // After debounce — advance timers then flush promises for fetch resolution + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.queryText).toBe('abc'); + }); + + it('does not search below minQueryLength', async () => { + const { result } = renderHook(() => + useVectorSearch('products', { minQueryLength: 3 }) + ); + + act(() => { result.current.search('ab'); }); + await act(async () => { vi.advanceTimersByTime(400); }); + + expect(mockFetch).not.toHaveBeenCalled(); + expect(result.current.results).toEqual([]); + }); + + it('sets isLoading true during search', async () => { + let resolveJson!: (v: unknown) => void; + mockFetch.mockReturnValue( + Promise.resolve({ + ok: true, + json: () => new Promise((r) => { resolveJson = r; }), + }) + ); + + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + // fetch was called, but json() hasn't resolved yet + await flushPromises(); + + expect(result.current.isLoading).toBe(true); + + await act(async () => { resolveJson(mockResponse); }); + expect(result.current.isLoading).toBe(false); + expect(result.current.results).toHaveLength(1); + }); + + it('populates results after successful search', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.results).toHaveLength(1); + expect(result.current.results[0].score).toBe(0.95); + expect(result.current.results[0].data).toEqual({ id: 1, title: 'Test Result' }); + expect(result.current.totalCount).toBe(1); + expect(result.current.queryTimeMs).toBe(20); + expect(result.current.fromCache).toBe(false); + expect(result.current.query).toBe('test'); + }); + + it('sets error on failed search', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 404, + json: () => Promise.resolve({ code: 'INDEX_NOT_FOUND', message: 'Not found', statusCode: 404 }), + }); + + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.error).not.toBeNull(); + expect(result.current.error!.code).toBe('INDEX_NOT_FOUND'); + expect(result.current.isLoading).toBe(false); + }); + + it('clears everything on clear()', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.results).toHaveLength(1); + + act(() => { result.current.clear(); }); + + expect(result.current.results).toEqual([]); + expect(result.current.query).toBe(''); + expect(result.current.totalCount).toBe(0); + expect(result.current.error).toBeNull(); + }); + + it('re-executes search when filters change', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + + await act(async () => { result.current.setFilters({ category: 'books' }); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(2); + const body = JSON.parse(mockFetch.mock.calls[1][1].body); + expect(body.filters).toEqual({ category: 'books' }); + }); + + it('calls onResults callback on success', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const onResults = vi.fn(); + const { result } = renderHook(() => + useVectorSearch('products', { onResults }) + ); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(onResults).toHaveBeenCalledTimes(1); + expect(onResults).toHaveBeenCalledWith(mockResponse); + }); + + it('calls onError callback on failure', async () => { + const errorResponse = { code: 'INTERNAL', message: 'Server error', statusCode: 500 }; + mockFetch.mockResolvedValue({ + ok: false, + status: 500, + json: () => Promise.resolve(errorResponse), + }); + const onError = vi.fn(); + const { result } = renderHook(() => + useVectorSearch('products', { onError }) + ); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(onError).toHaveBeenCalledTimes(1); + }); + + it('sends request to correct API endpoint', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][0]).toBe('/api/vector-search/products/query'); + }); +}); diff --git a/packages/vector-search/tsconfig.json b/packages/vector-search/tsconfig.json new file mode 100644 index 00000000..c2c6364a --- /dev/null +++ b/packages/vector-search/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "jsx": "react-jsx", + "declaration": true, + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts", "**/*.test.tsx"] +} diff --git a/packages/vector-search/vitest.config.ts b/packages/vector-search/vitest.config.ts new file mode 100644 index 00000000..6ccbfd4c --- /dev/null +++ b/packages/vector-search/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + environment: 'jsdom', + globals: true, + setupFiles: ['./vitest.setup.ts'], + }, +}); diff --git a/packages/vector-search/vitest.setup.ts b/packages/vector-search/vitest.setup.ts new file mode 100644 index 00000000..bb02c60c --- /dev/null +++ b/packages/vector-search/vitest.setup.ts @@ -0,0 +1 @@ +import '@testing-library/jest-dom/vitest';