Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 65 additions & 14 deletions src/common/connectionManager.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { EventEmitter } from "events";
import type { MongoClientOptions } from "mongodb";
import type { MongoClientOptions, MongoServerError } from "mongodb";
import { ConnectionString } from "mongodb-connection-string-url";
import { NodeDriverServiceProvider } from "@mongosh/service-provider-node-driver";
import { generateConnectionInfoFromCliArgs, type ConnectionInfo } from "@mongosh/arg-parser";
Expand Down Expand Up @@ -33,6 +33,8 @@ export interface ConnectionState {
}

const MCP_TEST_DATABASE = "#mongodb-mcp";
const MCP_TEST_VECTOR_SEARCH_INDEX_FIELD = "__mongodb-mcp-field";
const MCP_TEST_VECTOR_SEARCH_INDEX_NAME = "mongodb-mcp-vector-search-index";

export const defaultDriverOptions: ConnectionInfo["driverOptions"] = {
readConcern: {
Expand All @@ -56,23 +58,72 @@ export class ConnectionStateConnected implements ConnectionState {
public connectedAtlasCluster?: AtlasClusterConnectionInfo
) {}

private _isSearchSupported?: boolean;
private connectionMetadata?: { searchSupported: boolean; autoEmbeddingIndexSupported: boolean };

public async isSearchSupported(): Promise<boolean> {
if (this._isSearchSupported === undefined) {
try {
// If a cluster supports search indexes, the call below will succeed
// with a cursor otherwise will throw an Error.
// the Search Index Management Service might not be ready yet, but
// we assume that the agent can retry in that situation.
await this.serviceProvider.getSearchIndexes(MCP_TEST_DATABASE, "test");
this._isSearchSupported = true;
} catch {
this._isSearchSupported = false;
private async populateConnectionMetadata(): Promise<void> {
try {
await this.serviceProvider.createCollection(MCP_TEST_DATABASE, "test");
// If a cluster supports search indexes, the call below will succeed
// with a cursor otherwise will throw an Error. The Search Index
// Management Service might not be ready yet, but we assume that the
// agent can retry in that situation.
await this.serviceProvider.createSearchIndexes(MCP_TEST_DATABASE, "test", [
{
name: MCP_TEST_VECTOR_SEARCH_INDEX_NAME,
type: "vectorSearch",
definition: {
fields: [
{
// TODO: Before public preview this needs to be
// changed to either "autoEmbedText" or "autoEmbed"
// depending on which syntax is finalized.
type: "text",
path: MCP_TEST_VECTOR_SEARCH_INDEX_FIELD,
model: "voyage-3-large",
},
],
},
},
]);
this.connectionMetadata = {
searchSupported: true,
autoEmbeddingIndexSupported: true,
};
} catch (error) {
if ((error as MongoServerError).codeName === "SearchNotEnabled") {
this.connectionMetadata = {
searchSupported: false,
autoEmbeddingIndexSupported: false,
};
}
// If the error if because we tried creating an index with autoEmbed
// field definition then we can safely assume that search is
// supported but auto-embeddings are not.
else if ((error as Error).message.includes('"userCommand.indexes[0].fields[0].type" must be one of')) {
this.connectionMetadata = {
searchSupported: true,
autoEmbeddingIndexSupported: false,
};
}
} finally {
await this.serviceProvider.dropDatabase(MCP_TEST_DATABASE);
}
}

public async isSearchSupported(): Promise<boolean> {
if (this.connectionMetadata === undefined) {
await this.populateConnectionMetadata();
}

return this.connectionMetadata?.searchSupported ?? false;
}

public async isAutoEmbeddingIndexSupported(): Promise<boolean> {
if (this.connectionMetadata === undefined) {
await this.populateConnectionMetadata();
}

return this._isSearchSupported;
return this.connectionMetadata?.autoEmbeddingIndexSupported ?? false;
}
}

Expand Down
1 change: 1 addition & 0 deletions src/common/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export enum ErrorCodes {
AtlasVectorSearchIndexNotFound = 1_000_006,
AtlasVectorSearchInvalidQuery = 1_000_007,
Unexpected = 1_000_008,
AutoEmbeddingIndexNotSupported = 1_000_009,
}

export class MongoDBError<ErrorCode extends ErrorCodes = ErrorCodes> extends Error {
Expand Down
19 changes: 19 additions & 0 deletions src/common/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,25 @@ export class Session extends EventEmitter<SessionEvents> {
}
}

async isAutoEmbeddingIndexSupported(): Promise<boolean> {
const state = this.connectionManager.currentConnectionState;
if (state.tag === "connected") {
return await state.isAutoEmbeddingIndexSupported();
}

return false;
}

async assertAutoEmbeddingIndexSupported(): Promise<void> {
const isSearchSupported = await this.isSearchSupported();
if (!isSearchSupported) {
throw new MongoDBError(
ErrorCodes.AutoEmbeddingIndexNotSupported,
"The connected search management service does not support creating auto-embedding indexes."
);
}
}

get serviceProvider(): NodeDriverServiceProvider {
if (this.isConnectedToMongoDB) {
const state = this.connectionManager.currentConnectionState as ConnectionStateConnected;
Expand Down
137 changes: 88 additions & 49 deletions src/tools/mongodb/create/createIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,62 +7,96 @@ import { quantizationEnum } from "../../../common/search/vectorSearchEmbeddingsM
import { similarityValues } from "../../../common/schemas.js";

export class CreateIndexTool extends MongoDBToolBase {
private filterFieldSchema = z
.object({
type: z.literal("filter"),
path: z
.string()
.describe(
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
),
})
.strict()
.describe("Definition for a field that will be used for pre-filtering results.");

private vectorFieldSchema = z
.object({
type: z.literal("vector"),
path: z
.string()
.describe(
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
),
numDimensions: z
.number()
.min(1)
.max(8192)
.default(this.config.vectorSearchDimensions)
.describe(
"Number of vector dimensions that MongoDB Vector Search enforces at index-time and query-time"
),
similarity: z
.enum(similarityValues)
.default(this.config.vectorSearchSimilarityFunction)
.describe(
"Vector similarity function to use to search for top K-nearest neighbors. You can set this field only for vector-type fields."
),
quantization: quantizationEnum
.default("none")
.describe(
"Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float or double vectors."
),
})
.strict()
.describe("Definition for a field that contains vector embeddings.");

private autoEmbedFieldSchema = z
.object({
// TODO: Before public preview this needs to be changed to either
// "autoEmbedText" or "autoEmbed" depending on which syntax is
// finalized.
type: z.literal("text"),
path: z
.string()
.describe(
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
),
model: z
.enum(["voyage-3-large", "voyage-3.5", "voyage-3.5-lite"])
.describe("Voyage embedding model to be used for generating auto-embeddings"),
// TODO: As of public preview, this is not required and even if
// required, the supported modality is only "text". However we should
// keep an eye on whether the recent changes changes the requirements
// around this field.
// modality: z.enum(["text"]).describe("The type of data in the field mentioned in `path` property."),

// We don't support specifying `hnswOptions` even in the current vector
// search implementation. Following the same idea, we won't support
// `hnswOptions` for `autoEmbedText` field definition as well.
// hnswOptions: z.object({})
})
.strict()
.describe("Definition for a field for which embeddings must be auto-generated.");

private vectorSearchIndexDefinition = z
.object({
type: z.literal("vectorSearch"),
fields: z
.array(
z.discriminatedUnion("type", [
z
.object({
type: z.literal("filter"),
path: z
.string()
.describe(
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
),
})
.strict()
.describe("Definition for a field that will be used for pre-filtering results."),
z
.object({
type: z.literal("vector"),
path: z
.string()
.describe(
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
),
numDimensions: z
.number()
.min(1)
.max(8192)
.default(this.config.vectorSearchDimensions)
.describe(
"Number of vector dimensions that MongoDB Vector Search enforces at index-time and query-time"
),
similarity: z
.enum(similarityValues)
.default(this.config.vectorSearchSimilarityFunction)
.describe(
"Vector similarity function to use to search for top K-nearest neighbors. You can set this field only for vector-type fields."
),
quantization: quantizationEnum
.default("none")
.describe(
"Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float or double vectors."
),
})
.strict()
.describe("Definition for a field that contains vector embeddings."),
this.filterFieldSchema,
this.vectorFieldSchema,
this.autoEmbedFieldSchema,
])
)
.nonempty()
.refine((fields) => fields.some((f) => f.type === "vector"), {
message: "At least one vector field must be defined",
})
.describe(
"Definitions for the vector and filter fields to index, one definition per document. You must specify `vector` for fields that contain vector embeddings and `filter` for additional fields to filter on. At least one vector-type field definition is required."
),
.refine((fields) => fields.some((f) => f.type === "vector" || f.type === "text"), {
message: "At least one field of type 'vector' or 'text' must be defined",
}).describe(`\
Definitions for the vector, auto-embed and filter fields to index, one definition per document. \
You must specify 'vector' for fields that contain vector embeddings, 'text' for fields for which embeddings must be auto-generated and, \
'filter' for additional fields to filter on. At least one 'vector' or 'text' type field definition is required.\
`),
})
.describe("Definition for a Vector Search index.");

Expand Down Expand Up @@ -183,7 +217,12 @@ export class CreateIndexTool extends MongoDBToolBase {
break;
case "vectorSearch":
{
await this.ensureSearchIsSupported();
const creatingAutoEmbedIndex = definition.fields.some((field) => field.type === "text");
if (creatingAutoEmbedIndex) {
await this.session.assertAutoEmbeddingIndexSupported();
} else {
await this.session.assertSearchSupported();
}
indexes = await provider.createSearchIndexes(database, collection, [
{
name,
Expand All @@ -204,7 +243,7 @@ export class CreateIndexTool extends MongoDBToolBase {
break;
case "search":
{
await this.ensureSearchIsSupported();
await this.session.assertSearchSupported();
indexes = await provider.createSearchIndexes(database, collection, [
{
name,
Expand Down
10 changes: 6 additions & 4 deletions src/tools/mongodb/create/insertMany.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const zSupportedEmbeddingParametersWithInput = zSupportedEmbeddingParameters.ext
input: z
.array(z.object({}).passthrough())
.describe(
"Array of objects with vector search index fields as keys (in dot notation) and the raw text values to generate embeddings for as values. The index of each object corresponds to the index of the document in the documents array."
"Array of objects with indexed field paths as keys (in dot notation) and the raw text values to generate embeddings for as values. Only provide fields that require embeddings to be generated; do not include fields that are covered by auto-embedding indexes. The index of each object corresponds to the index of the document in the documents array."
),
});

Expand All @@ -34,7 +34,7 @@ export class InsertManyTool extends MongoDBToolBase {
embeddingParameters: zSupportedEmbeddingParametersWithInput
.optional()
.describe(
"The embedding model and its parameters to use to generate embeddings for fields with vector search indexes. Note to LLM: If unsure which embedding model to use, ask the user before providing one."
"The embedding model and its parameters to use to generate embeddings. Only provide this for fields where you need to generate embeddings; do not include fields that have auto-embedding indexes configured, as MongoDB will automatically generate embeddings for those. Note to LLM: If unsure which embedding model to use, ask the user before providing one."
),
}
: commonArgs;
Expand Down Expand Up @@ -93,7 +93,9 @@ export class InsertManyTool extends MongoDBToolBase {
return documents;
}

// Get vector search indexes for the collection
// Get vector search indexes for the collection.
// Note: embeddingsForNamespace() only returns fields that require manual embedding generation,
// excluding fields with auto-embedding indexes where MongoDB generates embeddings automatically.
const vectorIndexes = await this.session.vectorSearchEmbeddingsManager.embeddingsForNamespace({
database,
collection,
Expand All @@ -105,7 +107,7 @@ export class InsertManyTool extends MongoDBToolBase {
if (!vectorIndexes.some((index) => index.path === fieldPath)) {
throw new MongoDBError(
ErrorCodes.AtlasVectorSearchInvalidQuery,
`Field '${fieldPath}' does not have a vector search index in collection ${database}.${collection}. Only fields with vector search indexes can have embeddings generated.`
`Field '${fieldPath}' does not have a vector search index configured for manual embedding generation in collection ${database}.${collection}. This field either has no index or has an auto-embedding index where MongoDB generates embeddings automatically.`
);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/tools/mongodb/delete/dropIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ export class DropIndexTool extends MongoDBToolBase {
provider: NodeDriverServiceProvider,
{ database, collection, indexName }: ToolArgs<typeof this.argsShape>
): Promise<CallToolResult> {
await this.ensureSearchIsSupported();
await this.session.assertSearchSupported();
const indexes = await provider.getSearchIndexes(database, collection, indexName);
if (indexes.length === 0) {
return {
Expand Down
Loading
Loading