-
Notifications
You must be signed in to change notification settings - Fork 4.6k
Expand file tree
/
Copy pathCosmosMongoCollectionCreateMapping.cs
More file actions
129 lines (111 loc) · 5.31 KB
/
CosmosMongoCollectionCreateMapping.cs
File metadata and controls
129 lines (111 loc) · 5.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Copyright (c) Microsoft. All rights reserved.
using System;
using System.Collections.Generic;
using Microsoft.Extensions.VectorData;
using Microsoft.Extensions.VectorData.ProviderServices;
using MongoDB.Bson;
namespace Microsoft.SemanticKernel.Connectors.CosmosMongoDB;
/// <summary>
/// Contains mapping helpers to use when creating a collection in Azure CosmosDB MongoDB.
/// </summary>
internal static class CosmosMongoCollectionCreateMapping
{
/// <summary>
/// Returns an array of indexes to create for vector properties.
/// </summary>
/// <param name="vectorProperties">Collection of vector properties for index creation.</param>
/// <param name="uniqueIndexes">Collection of unique existing indexes to avoid creating duplicates.</param>
/// <param name="numLists">Number of clusters that the inverted file (IVF) index uses to group the vector data.</param>
/// <param name="efConstruction">The size of the dynamic candidate list for constructing the graph.</param>
public static BsonArray GetVectorIndexes(
IReadOnlyList<IVectorPropertyModel> vectorProperties,
HashSet<string?> uniqueIndexes,
int numLists,
int efConstruction)
{
var indexArray = new BsonArray();
// Create separate index for each vector property
foreach (var property in vectorProperties)
{
var storageName = property.StorageName;
// Use index name same as vector property name with underscore
var indexName = $"{storageName}_";
// If index already exists, proceed to the next vector property
if (uniqueIndexes.Contains(indexName))
{
continue;
}
// Otherwise, create a new index
var searchOptions = new BsonDocument
{
{ "kind", GetIndexKind(property.IndexKind, storageName) },
{ "numLists", numLists },
{ "similarity", GetDistanceFunction(property.DistanceFunction, storageName) },
{ "dimensions", property.Dimensions },
{ "efConstruction", efConstruction }
};
var indexDocument = new BsonDocument
{
["name"] = indexName,
["key"] = new BsonDocument { [storageName] = "cosmosSearch" },
["cosmosSearchOptions"] = searchOptions
};
indexArray.Add(indexDocument);
}
return indexArray;
}
/// <summary>
/// Returns an array of indexes to create for filterable data properties.
/// </summary>
/// <param name="dataProperties">Collection of data properties for index creation.</param>
/// <param name="uniqueIndexes">Collection of unique existing indexes to avoid creating duplicates.</param>
public static BsonArray GetFilterableDataIndexes(
IReadOnlyList<IDataPropertyModel> dataProperties,
HashSet<string?> uniqueIndexes)
{
var indexArray = new BsonArray();
// Create separate index for each data property
foreach (var property in dataProperties)
{
if (property.IsIndexed)
{
// Use index name same as data property name with underscore
var indexName = $"{property.StorageName}_";
// If index already exists, proceed to the next data property
if (uniqueIndexes.Contains(indexName))
{
continue;
}
// Otherwise, create a new index
var indexDocument = new BsonDocument
{
["name"] = indexName,
["key"] = new BsonDocument { [property.StorageName] = 1 }
};
indexArray.Add(indexDocument);
}
}
return indexArray;
}
/// <summary>
/// More information about Azure CosmosDB for MongoDB index kinds here: <see href="https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search" />.
/// </summary>
private static string GetIndexKind(string? indexKind, string vectorPropertyName)
=> CosmosMongoCollectionSearchMapping.GetVectorPropertyIndexKind(indexKind) switch
{
IndexKind.Hnsw => "vector-hnsw",
IndexKind.IvfFlat => "vector-ivf",
_ => throw new NotSupportedException($"Index kind '{indexKind}' on {nameof(VectorStoreVectorProperty)} '{vectorPropertyName}' is not supported by the Azure CosmosDB for MongoDB VectorStore.")
};
/// <summary>
/// More information about Azure CosmosDB for MongoDB distance functions here: <see href="https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search" />.
/// </summary>
private static string GetDistanceFunction(string? distanceFunction, string vectorPropertyName)
=> CosmosMongoCollectionSearchMapping.GetVectorPropertyDistanceFunction(distanceFunction) switch
{
DistanceFunction.CosineDistance => "COS",
DistanceFunction.DotProductSimilarity => "IP",
DistanceFunction.EuclideanDistance => "L2",
_ => throw new NotSupportedException($"Distance function '{distanceFunction}' for {nameof(VectorStoreVectorProperty)} '{vectorPropertyName}' is not supported by the Azure CosmosDB for MongoDB VectorStore.")
};
}