Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
98322ae
feat: add Article 2 select-algorithm samples for all 5 languages
diberry Apr 29, 2026
5114591
fix: review findings - auth scope, consistency, env vars
diberry Apr 29, 2026
7185bb9
refactor(.NET): replace DotNetEnv with appsettings.json + Configurati…
diberry Apr 29, 2026
f9d5f10
docs: add azd env get-values config section to Article 2 READMEs
diberry Apr 29, 2026
2cde68a
feat: add compare-all runner for all 5 languages
diberry Apr 29, 2026
48b9f12
merge: pull compare-all changes from origin
diberry Apr 29, 2026
4d421ad
refactor: make compare-all self-contained with create/cleanup
diberry Apr 29, 2026
edcfe2a
Standardize collection lifecycle: conditional drop at start, always d…
diberry Apr 30, 2026
e17a32d
feat(java): Add individual algorithm runner files (IVF, HNSW, DiskANN)
diberry May 5, 2026
3fe2ce1
Add TypeScript individual runners and fix compare-all
diberry May 5, 2026
5918b62
feat(python): Add individual algorithm runners and fix utils
diberry May 5, 2026
85a6bf9
feat(go): Implement complete vector search algorithm comparison sample
diberry May 5, 2026
a9408ee
refactor: Move vector-search updates to separate PR #79
diberry May 6, 2026
a2d1762
fix: resolve merge conflicts and build errors in select-algorithm-typ…
diberry May 6, 2026
4900b92
fix: resolve merge conflicts and add missing getConfig in select-algo…
diberry May 6, 2026
be79978
fix: use create/search/drop pattern for compare-all across all languages
diberry May 6, 2026
ff8b0a3
feat: show top 2 results with score diff instead of latency
diberry May 6, 2026
4927a9c
feat: standardize output format with key insights across all languages
diberry May 6, 2026
7302e72
Update select-algorithm samples to use local data/ folder
diberry May 6, 2026
609cd61
fix: run all samples, fix Python search API, add real output
diberry May 6, 2026
84c6ffa
fix(java): fix OIDC auth and compilation errors, re-capture all outpu…
diberry May 6, 2026
90509f8
fix: address review findings - standardize EMBEDDED_FIELD, fix Go err…
diberry May 6, 2026
9ec1a56
fix: OIDC auth + search retry for all 5 compare-all samples
diberry May 7, 2026
06e7210
fix: Python pymongo version and .NET CompareAll signature mismatch
diberry May 7, 2026
088d3ab
fix: treat empty SIMILARITY env var as run-all instead of crashing
diberry May 8, 2026
4371108
fix: select-algorithm samples — 12 issues from cross-language comparison
diberry May 8, 2026
e79e987
fix: Update vector-search samples for consistent collection lifecycle
diberry May 6, 2026
59d1703
Revert "fix: Update vector-search samples for consistent collection l…
diberry May 8, 2026
229b3ad
chore: move copilot-instructions to PR #80
diberry May 8, 2026
5c69d16
fix: align select-algorithm samples with instruction rules
diberry May 8, 2026
4b5ab0c
fix: use local data/ path for Hotels_Vector.json in all select-algori…
diberry May 8, 2026
329ce80
feat: update output format to show top 2 results with diff
diberry May 8, 2026
fd03188
fix: standardize retryWrites=false, fix TS openai import and token ex…
diberry May 8, 2026
9497fcc
fix: add retryWrites=false to Java MongoClientSettings
diberry May 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions ai/select-algorithm-dotnet/.devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"name": "Azure DocumentDB Select Algorithm - .NET 8",
"image": "mcr.microsoft.com/devcontainers/dotnet:1-8.0-bookworm",

"features": {
"ghcr.io/devcontainers/features/azure-cli:1": {},
"ghcr.io/devcontainers/features/github-cli:1": {},
"ghcr.io/devcontainers/features/common-utils:2": {
"installZsh": true,
"configureZshAsDefaultShell": true,
"installOhMyZsh": true
}
},

"customizations": {
"vscode": {
"extensions": [
"ms-dotnettools.csdevkit",
"ms-dotnettools.vscodeintellicode-csharp",
"ms-azuretools.vscode-azureresourcegroups",
"ms-azuretools.vscode-cosmosdb",
"mongodb.mongodb-vscode"
],
"settings": {
"dotnet.completion.showCompletionItemsFromUnimportedNamespaces": true,
"files.exclude": {
"**/bin": true,
"**/obj": true
}
}
}
},

"postCreateCommand": "dotnet restore && dotnet build",
"remoteUser": "vscode",

"containerEnv": {
"DOTNET_CLI_TELEMETRY_OPTOUT": "1",
"DOTNET_NOLOGO": "1"
},

"mounts": [
"source=${localEnv:HOME}${localEnv:USERPROFILE}/.azure,target=/home/vscode/.azure,type=bind,consistency=cached"
],

"capAdd": ["SYS_PTRACE"],
"securityOpt": ["seccomp:unconfined"]
}
7 changes: 7 additions & 0 deletions ai/select-algorithm-dotnet/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
bin/
obj/
.env

# Local data copy (user copies from ai/data/)
data/*.json
!data/README.md
259 changes: 259 additions & 0 deletions ai/select-algorithm-dotnet/CompareAll.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
/// Unified comparison runner for all 9 combinations (3 algorithms × 3 similarity metrics).
/// Executes vector searches sequentially for fair timing and prints a formatted comparison table.

namespace SelectAlgorithm;

using MongoDB.Driver;
using MongoDB.Bson;
using OpenAI.Embeddings;
using SelectAlgorithm.Models;

public static class CompareAll
{
private record IndexConfig(string Name, string Kind, string Similarity, BsonDocument ExtraParams);

private record SearchResult(string Algorithm, string Metric, string Top1Name, double Top1Score, string Top2Name, double Top2Score);

private static string GetAlgoDisplay(string kind) => kind switch
{
"vector-ivf" => "IVF",
"vector-hnsw" => "HNSW",
"vector-diskann" => "DiskANN",
_ => kind
};

public static void Run(AppConfiguration appConfig)
{
Console.WriteLine(new string('=', 60));
Console.WriteLine(" Compare All Algorithms × Metrics");
Console.WriteLine(" 9 combinations: IVF, HNSW, DiskANN × COS, L2, IP");
Console.WriteLine(new string('=', 60));

// Use config values with env var overrides for compare-specific settings
var databaseName = appConfig.MongoDB.DatabaseName;
var dataFile = appConfig.DataFiles.WithVectors;
var vectorField = appConfig.Embedding.EmbeddedField;
var dimensions = appConfig.Embedding.Dimensions;
var batchSize = appConfig.MongoDB.LoadBatchSize;
var queryText = Environment.GetEnvironmentVariable("QUERY_TEXT") ?? "luxury hotel near the beach";
var topK = int.Parse(Environment.GetEnvironmentVariable("TOP_K") ?? "5");

var mongoClient = Utils.GetMongoClientPasswordless(appConfig);
var embeddingClient = Utils.GetEmbeddingClient(appConfig);

try
{
var database = mongoClient.GetDatabase(databaseName);

// Drop collection for a clean comparison
database.DropCollection("hotels");
Console.WriteLine("Dropped existing 'hotels' collection (if any)");

var collection = database.GetCollection<BsonDocument>("hotels");

// Load data once into single collection
var data = Utils.ReadJsonFile(dataFile);
var documents = data.Where(d => d.Contains(vectorField)).ToList();
Console.WriteLine($"\nLoaded {documents.Count} documents with embeddings");
Utils.InsertData(collection, documents, batchSize);

// Generate ONE embedding for the query (reused for all 9 searches)
Console.WriteLine($"\nQuery: \"{queryText}\"");
Console.WriteLine($"Top K: {topK}");
var embeddingResult = embeddingClient.GenerateEmbedding(queryText);
var queryVector = embeddingResult.Value.ToFloats().ToArray();
Console.WriteLine("Embedding generated (reused for all searches)\n");

// Define 9 index configurations
var configs = BuildIndexConfigs(dimensions);

// Run each config sequentially: drop→create→wait→search
// DocumentDB doesn't allow multiple vector indexes of the same kind on the same field
Console.WriteLine("Running 9 algorithm × metric combinations...\n");
var results = new List<SearchResult>();
foreach (var config in configs)
{
// 1. Drop all existing vector indexes
DropVectorIndexes(collection, vectorField);

// 2. Create this specific index
CreateIndex(collection, vectorField, config);
Console.WriteLine($" ✓ {config.Name} created");

// 3. Wait for index to build
Thread.Sleep(5000);

// 4. Search
var searchResults = RunVectorSearch(collection, queryVector, vectorField, config.Name, topK);

// 5. Extract top 2 results and record
var algoDisplay = GetAlgoDisplay(config.Kind);
var top1Name = "-"; var top1Score = 0.0;
var top2Name = "-"; var top2Score = 0.0;
if (searchResults.Count > 0)
{
var doc1 = searchResults[0];
top1Name = doc1.Contains("HotelName") ? doc1["HotelName"].AsString : "Unknown";
top1Score = doc1.Contains("score") ? doc1["score"].ToDouble() : 0.0;
}
if (searchResults.Count > 1)
{
var doc2 = searchResults[1];
top2Name = doc2.Contains("HotelName") ? doc2["HotelName"].AsString : "Unknown";
top2Score = doc2.Contains("score") ? doc2["score"].ToDouble() : 0.0;
}
results.Add(new SearchResult(algoDisplay, config.Similarity, top1Name, top1Score, top2Name, top2Score));
}

// Print comparison table
PrintComparisonTable(results);
}
finally
{
// Cleanup: drop the comparison collection
try
{
var database = mongoClient.GetDatabase(databaseName);
database.DropCollection("hotels");
Console.WriteLine("\nCleanup: dropped collection 'hotels'");
}
catch (Exception ex)
{
Console.WriteLine($"Cleanup warning: {ex.Message}");
}
mongoClient.Cluster.Dispose();
}
}

private static List<IndexConfig> BuildIndexConfigs(int dimensions)
{
string[] metrics = ["COS", "L2", "IP"];
var configs = new List<IndexConfig>();

// IVF
foreach (var metric in metrics)
configs.Add(new IndexConfig($"vector_ivf_{metric.ToLower()}", "vector-ivf", metric, new BsonDocument { { "numLists", 1 } }));

// HNSW
foreach (var metric in metrics)
configs.Add(new IndexConfig($"vector_hnsw_{metric.ToLower()}", "vector-hnsw", metric, new BsonDocument { { "m", 16 }, { "efConstruction", 64 } }));

// DiskANN
foreach (var metric in metrics)
configs.Add(new IndexConfig($"vector_diskann_{metric.ToLower()}", "vector-diskann", metric, new BsonDocument { { "maxDegree", 20 }, { "lBuild", 10 } }));

return configs;
}

private static void DropVectorIndexes(IMongoCollection<BsonDocument> collection, string vectorField)
{
try
{
using var cursor = collection.Indexes.List();
foreach (var idx in cursor.ToList())
{
var name = idx.GetValue("name", "").AsString;
var key = idx.GetValue("key", new BsonDocument()).AsBsonDocument;
if (key.Contains(vectorField) && key[vectorField].AsString == "cosmosSearch")
{
try { collection.Indexes.DropOne(name); } catch { }
}
}
}
catch { }
}

private static void CreateIndex(IMongoCollection<BsonDocument> collection, string vectorField, IndexConfig config)
{
// Drop existing index with same name if present
try
{
collection.Indexes.DropOne(config.Name);
}
catch (MongoCommandException)
{
// Index doesn't exist, that's fine
}

var cosmosSearchOptions = new BsonDocument
{
{ "kind", config.Kind },
{ "dimensions", int.Parse(Environment.GetEnvironmentVariable("EMBEDDING_DIMENSIONS") ?? "1536") },
{ "similarity", config.Similarity }
};

foreach (var param in config.ExtraParams)
{
cosmosSearchOptions.Add(param);
}

var command = new BsonDocument
{
{ "createIndexes", collection.CollectionNamespace.CollectionName },
{ "indexes", new BsonArray
{
new BsonDocument
{
{ "name", config.Name },
{ "key", new BsonDocument(vectorField, "cosmosSearch") },
{ "cosmosSearchOptions", cosmosSearchOptions }
}
}
}
};

try
{
collection.Database.RunCommand<BsonDocument>(command);
}
catch (MongoCommandException ex) when (ex.Message.Contains("already exists"))
{
// Index already exists with same config — idempotent
}
}

private static List<BsonDocument> RunVectorSearch(
IMongoCollection<BsonDocument> collection,
float[] queryVector,
string vectorField,
string indexName,
int topK)
{
var pipeline = new[]
{
new BsonDocument("$search", new BsonDocument("cosmosSearch", new BsonDocument
{
{ "vector", new BsonArray(queryVector.Select(f => (double)f)) },
{ "path", vectorField },
{ "k", topK }
})),
new BsonDocument("$project", new BsonDocument
{
{ "HotelName", 1 },
{ "score", new BsonDocument("$meta", "searchScore") }
})
};

return collection.Aggregate<BsonDocument>(pipeline).ToList();
}

private static void PrintComparisonTable(List<SearchResult> results)
{
Console.WriteLine();
Console.WriteLine("┌──────────┬────────┬────────────────────────────┬────────┬────────────────────────────┬────────┬───────┐");
Console.WriteLine($"│ {"Algorithm",-9}│ {"Metric",-7}│ {"Top 1 Result",-27}│ {"Score",-7}│ {"Top 2 Result",-27}│ {"Score",-7}│ {"Diff",-6}│");
Console.WriteLine("├──────────┼────────┼────────────────────────────┼────────┼────────────────────────────┼────────┼───────┤");

for (var i = 0; i < results.Count; i++)
{
var r = results[i];
var diff = Math.Abs(r.Top1Score - r.Top2Score);
var top1Display = r.Top1Name.Length > 27 ? r.Top1Name[..24] + "..." : r.Top1Name;
var top2Display = r.Top2Name.Length > 27 ? r.Top2Name[..24] + "..." : r.Top2Name;
Console.WriteLine($"│ {r.Algorithm,-9}│ {r.Metric,-7}│ {top1Display,-27}│ {r.Top1Score,-7:F4}│ {top2Display,-27}│ {r.Top2Score,-7:F4}│ {diff,-6:F4}│");
if (i < results.Count - 1)
Console.WriteLine("├──────────┼────────┼────────────────────────────┼────────┼────────────────────────────┼────────┼───────┤");
}
Console.WriteLine("└──────────┴────────┴────────────────────────────┴────────┴────────────────────────────┴────────┴───────┘");
}
}
41 changes: 41 additions & 0 deletions ai/select-algorithm-dotnet/Models/Configuration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
namespace SelectAlgorithm.Models;

public class AppConfiguration
{
public AzureOpenAIConfiguration AzureOpenAI { get; set; } = new();
public MongoDBConfiguration MongoDB { get; set; } = new();
public EmbeddingConfiguration Embedding { get; set; } = new();
public VectorSearchConfiguration VectorSearch { get; set; } = new();
public DataFilesConfiguration DataFiles { get; set; } = new();
}

public class AzureOpenAIConfiguration
{
public string Endpoint { get; set; } = string.Empty;
public string EmbeddingModel { get; set; } = "text-embedding-3-small";
}

public class MongoDBConfiguration
{
public string ClusterName { get; set; } = string.Empty;
public string DatabaseName { get; set; } = "Hotels";
public int LoadBatchSize { get; set; } = 100;
}

public class EmbeddingConfiguration
{
public string EmbeddedField { get; set; } = "DescriptionVector";
public int Dimensions { get; set; } = 1536;
}

public class VectorSearchConfiguration
{
public string Query { get; set; } = "luxury hotel near the beach";
public string Similarity { get; set; } = "";
public int TopK { get; set; } = 5;
}

public class DataFilesConfiguration
{
public string WithVectors { get; set; } = "data/Hotels_Vector.json";
}
19 changes: 19 additions & 0 deletions ai/select-algorithm-dotnet/Models/HotelData.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;

namespace SelectAlgorithm.Models;

public class HotelData
{
[BsonId]
[BsonRepresentation(BsonType.ObjectId)]
public string? Id { get; set; }

public string HotelId { get; set; } = string.Empty;
public string HotelName { get; set; } = string.Empty;
public string Description { get; set; } = string.Empty;
public string Category { get; set; } = string.Empty;

[BsonExtraElements]
public BsonDocument? ExtraElements { get; set; }
}
Loading
Loading