Skip to content

Commit c7c9e3f

Browse files
Merge pull request #81 from Build5Nines/dev
v2.1.3
2 parents d36a5d9 + 42d82a9 commit c7c9e3f

8 files changed

Lines changed: 208 additions & 2 deletions

File tree

src/Build5Nines.SharpVector.OpenAI/Embeddings/OpenAIEmbeddingsGenerator.cs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
using Build5Nines.SharpVector.Embeddings;
22
using OpenAI.Embeddings;
3+
using System.Collections.Generic;
4+
using System.Linq;
35

46
namespace Build5Nines.SharpVector.OpenAI.Embeddings;
57

6-
public class OpenAIEmbeddingsGenerator : IEmbeddingsGenerator
8+
public class OpenAIEmbeddingsGenerator : IEmbeddingsGenerator //IBatchEmbeddingsGenerator
79
{
810
protected EmbeddingClient EmbeddingClient { get; private set; }
911

@@ -18,4 +20,29 @@ public async Task<float[]> GenerateEmbeddingsAsync(string text)
1820
var vector = embedding.ToFloats();
1921
return vector.ToArray();
2022
}
23+
24+
/// <summary>
25+
/// Generates embeddings for a batch of input texts using the OpenAI embeddings client.
26+
/// This leverages the API's multi-input batching for improved throughput and reduced overhead.
27+
/// </summary>
28+
/// <param name="texts">Collection of non-empty texts to embed.</param>
29+
/// <returns>A list of float vectors aligned to the input order.</returns>
30+
public async Task<IReadOnlyList<float[]>> GenerateEmbeddingsAsync(IEnumerable<string> texts)
31+
{
32+
if (texts is null) throw new ArgumentNullException(nameof(texts));
33+
34+
var inputs = texts.ToList();
35+
if (inputs.Count == 0)
36+
{
37+
return Array.Empty<float[]>();
38+
}
39+
40+
// Call the batch embeddings API once for all inputs.
41+
var batchResult = await EmbeddingClient.GenerateEmbeddingsAsync(inputs);
42+
43+
// Map the embeddings to float arrays while preserving order.
44+
var vectors = batchResult.Value.Select(e => e.ToFloats().ToArray()).ToList();
45+
46+
return vectors;
47+
}
2148
}

src/Build5Nines.SharpVector/Build5Nines.SharpVector.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<PackageId>Build5Nines.SharpVector</PackageId>
1010
<PackageProjectUrl>https://sharpvector.build5nines.com</PackageProjectUrl>
1111
<RepositoryUrl>https://github.com/Build5Nines/SharpVector</RepositoryUrl>
12-
<Version>2.1.2</Version>
12+
<Version>2.1.3</Version>
1313
<Description>Lightweight In-memory Vector Database to embed in any .NET Applications</Description>
1414
<Copyright>Copyright (c) 2025 Build5Nines LLC</Copyright>
1515
<PackageReadmeFile>README.md</PackageReadmeFile>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
namespace Build5Nines.SharpVector.Embeddings;
2+
3+
/// <summary>
4+
/// Optional capability for embeddings generators to support batch embedding of multiple texts.
5+
/// Implementations can leverage provider APIs that accept multi-input requests for better performance.
6+
/// </summary>
7+
public interface IBatchEmbeddingsGenerator : IEmbeddingsGenerator
8+
{
9+
/// <summary>
10+
/// Generates embeddings for multiple input texts in a single call when supported.
11+
/// </summary>
12+
/// <param name="texts">Collection of texts to embed. Order should be preserved in output.</param>
13+
/// <returns>A read-only list of embeddings vectors corresponding to the input order.</returns>
14+
Task<IReadOnlyList<float[]>> GenerateEmbeddingsAsync(IEnumerable<string> texts);
15+
}

src/Build5Nines.SharpVector/IVectorDatabase.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ public interface IVectorDatabase<TId, TMetadata, TDocument>
2828
/// <returns></returns>
2929
Task<TId> AddTextAsync(TDocument text, TMetadata? metadata = default(TMetadata));
3030

31+
/// <summary>
32+
/// Adds multiple texts with Metadata to the database and returns their IDs
33+
/// </summary>
34+
/// <param name="items">The texts and metadata to add in batch.</param>
35+
/// <returns>The IDs of the added texts.</returns>
36+
Task<IReadOnlyList<TId>> AddTextsAsync(IEnumerable<(TDocument text, TMetadata? metadata)> items);
37+
3138
/// <summary>
3239
/// Get all the Ids for each text the database.
3340
/// </summary>

src/Build5Nines.SharpVector/MemoryVectorDatabaseBase.cs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,22 @@ public IEnumerable<TId> GetIds()
107107
return id;
108108
}
109109

110+
public async Task<IReadOnlyList<TId>> AddTextsAsync(IEnumerable<(TVocabularyKey text, TMetadata? metadata)> items)
111+
{
112+
if (items is null) throw new ArgumentNullException(nameof(items));
113+
114+
var ids = new List<TId>();
115+
116+
foreach(var item in items)
117+
{
118+
TId id = await AddTextAsync(item.text, item.metadata);
119+
ids.Add(id);
120+
}
121+
122+
return ids;
123+
}
124+
125+
110126
/// <summary>
111127
/// Retrieves a text and metadata by its ID
112128
/// </summary>
@@ -469,6 +485,48 @@ public IEnumerable<TId> GetIds()
469485
return id;
470486
}
471487

488+
/// <summary>
489+
/// Adds multiple texts with optional metadata to the database efficiently.
490+
/// If the embeddings generator supports batching, this will generate vectors in a single multi-input call.
491+
/// </summary>
492+
/// <param name="items">Collection of (text, metadata) tuples to add.</param>
493+
/// <returns>List of generated IDs in the same order as inputs.</returns>
494+
public async Task<IReadOnlyList<TId>> AddTextsAsync(IEnumerable<(string text, TMetadata? metadata)> items)
495+
{
496+
if (items is null) throw new ArgumentNullException(nameof(items));
497+
498+
var list = items.ToList();
499+
if (list.Count == 0) return Array.Empty<TId>();
500+
501+
// Try batch embeddings if supported
502+
float[][] vectors;
503+
if (EmbeddingsGenerator is IBatchEmbeddingsGenerator batchGen)
504+
{
505+
var batch = await batchGen.GenerateEmbeddingsAsync(list.Select(i => i.text));
506+
vectors = batch.Select(v => v.ToArray()).ToArray();
507+
}
508+
else
509+
{
510+
// Fallback to per-item embedding
511+
vectors = new float[list.Count][];
512+
for (int i = 0; i < list.Count; i++)
513+
{
514+
vectors[i] = await EmbeddingsGenerator.GenerateEmbeddingsAsync(list[i].text);
515+
}
516+
}
517+
518+
// Store items and produce IDs
519+
var ids = new List<TId>(list.Count);
520+
for (int i = 0; i < list.Count; i++)
521+
{
522+
TId id = _idGenerator.NewId();
523+
ids.Add(id);
524+
await VectorStore.SetAsync(id, new VectorTextItem<TMetadata>(list[i].text, list[i].metadata, vectors[i]));
525+
}
526+
527+
return ids;
528+
}
529+
472530
/// <summary>
473531
/// Retrieves a text and metadata by its ID
474532
/// </summary>

src/OpenAIConsoleTest/OpenAIConsoleTest.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
</ItemGroup>
1515

1616
<ItemGroup>
17+
<!-- <ProjectReference Include="..\Build5Nines.SharpVector\Build5Nines.SharpVector.csproj" /> -->
1718
<ProjectReference Include="..\Build5Nines.SharpVector.OpenAI\Build5Nines.SharpVector.OpenAI.csproj" />
1819
</ItemGroup>
1920

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
namespace SharpVectorTest;
2+
3+
using System.Linq;
4+
using System.Threading.Tasks;
5+
using Build5Nines.SharpVector;
6+
using Build5Nines.SharpVector.Embeddings;
7+
using Build5Nines.SharpVector.Id;
8+
using Build5Nines.SharpVector.VectorCompare;
9+
using Build5Nines.SharpVector.VectorStore;
10+
11+
[TestClass]
12+
public class BatchAddTests
13+
{
14+
[TestMethod]
15+
public async Task AddTextsAsync_UsesBatchEmbeddings_WhenAvailable()
16+
{
17+
var db = new BatchMockMemoryVectorDatabase();
18+
19+
var inputs = new (string text, string? metadata)[]
20+
{
21+
("one", "m1"),
22+
("two", "m2"),
23+
("three", "m3")
24+
};
25+
26+
var ids = await db.AddTextsAsync(inputs);
27+
28+
Assert.AreEqual(3, ids.Count);
29+
30+
var results = db.Search("one");
31+
Assert.AreEqual(3, results.Texts.Count());
32+
33+
// Ensure vectors were assigned from batch generator (length = 5 per mock)
34+
foreach (var item in db)
35+
{
36+
Assert.AreEqual(5, item.Vector.Length);
37+
}
38+
}
39+
}
40+
41+
public class BatchMockMemoryVectorDatabase
42+
: MemoryVectorDatabaseBase<
43+
int,
44+
string,
45+
MemoryDictionaryVectorStore<int, string>,
46+
IntIdGenerator,
47+
CosineSimilarityVectorComparer
48+
>
49+
{
50+
public BatchMockMemoryVectorDatabase()
51+
: base(
52+
new MockBatchEmbeddingsGenerator(),
53+
new MemoryDictionaryVectorStore<int, string>()
54+
)
55+
{ }
56+
}
57+
58+
public class MockBatchEmbeddingsGenerator : IEmbeddingsGenerator, IBatchEmbeddingsGenerator
59+
{
60+
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
61+
public async Task<float[]> GenerateEmbeddingsAsync(string text)
62+
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
63+
{
64+
return new float[] { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f };
65+
}
66+
67+
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
68+
public async Task<IReadOnlyList<float[]>> GenerateEmbeddingsAsync(IEnumerable<string> texts)
69+
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
70+
{
71+
// Return a different first value to ensure we can recognize batched path if needed
72+
return texts.Select((t, idx) => new float[] { 0.9f, 0.2f, 0.3f, 0.4f, 0.5f }).ToList();
73+
}
74+
}

src/SharpVectorTest/VectorDatabaseTests.cs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,30 @@ public void BasicMemoryVectorDatabase_05()
9898
Assert.AreEqual("metadata2", results.Texts.First().Metadata);
9999
}
100100

101+
[TestMethod]
102+
public void BasicMemoryVectorDatabase_05_Batch()
103+
{
104+
var vdb = new BasicMemoryVectorDatabase();
105+
106+
// // Load Vector Database with some sample text
107+
var inputs = new (string text, string? metadata)[]
108+
{
109+
("The 👑 King", "metadata1"),
110+
("It's 🔥 Fire.", "metadata2"),
111+
("No emoji", "metadata3")
112+
};
113+
vdb.AddTextsAsync(inputs).Wait();
114+
115+
var results = vdb.Search("🔥", pageCount: 1);
116+
117+
Assert.AreEqual(1, results.Texts.Count());
118+
Assert.AreEqual(0.5773503184318542, results.Texts.First().Similarity);
119+
Assert.AreEqual("It's 🔥 Fire.", results.Texts.First().Text);
120+
Assert.AreEqual(2, results.Texts.First().Id);
121+
Assert.AreEqual("metadata2", results.Texts.First().Metadata);
122+
}
123+
124+
101125
[TestMethod]
102126
public void BasicMemoryVectorDatabase_06()
103127
{

0 commit comments

Comments
 (0)