From 35abcba66868956e308d85129c7b3314e3397882 Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Mon, 2 Feb 2026 13:43:12 +0100 Subject: [PATCH 1/5] [SVS] Implement 2-stage backend SVS index initialization Purpose: prevent long time SVS Tiered index lock at initialization time. Logic: First stage: create `svs::...::MutableVamanaIndex` instance with R/O shared lock Second stage: set `svs::...::MutableVamanaIndex` created before under R/W unique lock --- src/VecSim/algorithms/svs/svs.h | 74 +++++++++++++++++++++----- src/VecSim/algorithms/svs/svs_tiered.h | 24 +++++++-- 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h index 59b056d3c..7a8091a33 100644 --- a/src/VecSim/algorithms/svs/svs.h +++ b/src/VecSim/algorithms/svs/svs.h @@ -45,6 +45,16 @@ struct SVSIndexBase virtual size_t getThreadPoolCapacity() const = 0; virtual bool isCompressed() const = 0; size_t getNumMarkedDeleted() const { return num_marked_deleted; } + + // Abstract handler to manage SVS implementation instance + // declared to avoid unsafe unique_ptr usage + // Derived SVSIndex class should implement it + struct ImplHandler { + virtual ~ImplHandler() = default; + }; + virtual std::unique_ptr createImpl(const void *vectors_data, + const labelType *labels, size_t n) = 0; + virtual void setImpl(std::unique_ptr impl) = 0; #ifdef BUILD_TESTS virtual svs::logging::logger_ptr getLogger() const = 0; #endif @@ -144,7 +154,8 @@ class SVSIndex : public VecSimIndexAbstract, fl // Create SVS index instance with initial data // Data should not be empty template - void initImpl(const Dataset &points, std::span ids) { + std::unique_ptr initImpl(const Dataset &points, + std::span ids) const { svs::threads::ThreadPoolHandle threadpool_handle{VecSimSVSThreadPool{threadpool_}}; // Construct SVS index initial storage with compression if needed @@ -160,25 +171,26 @@ class SVSIndex : public VecSimIndexAbstract, fl // Construct initial Vamana Graph auto graph = - graph_builder_t::build_graph(parameters, data, distance, threadpool_, entry_point, + graph_builder_t::build_graph(parameters, data, distance, threadpool_handle, entry_point, this->blockSize, this->getAllocator(), logger_); // Create SVS MutableIndex instance - impl_ = std::make_unique(std::move(graph), std::move(data), entry_point, - std::move(distance), ids, threadpool_, logger_); + auto impl = std::make_unique(std::move(graph), std::move(data), entry_point, + std::move(distance), ids, threadpool_, logger_); // Set SVS MutableIndex build parameters to be used in future updates - impl_->set_construction_window_size(parameters.window_size); - impl_->set_max_candidates(parameters.max_candidate_pool_size); - impl_->set_prune_to(parameters.prune_to); - impl_->set_alpha(parameters.alpha); - impl_->set_full_search_history(parameters.use_full_search_history); + impl->set_construction_window_size(parameters.window_size); + impl->set_max_candidates(parameters.max_candidate_pool_size); + impl->set_prune_to(parameters.prune_to); + impl->set_alpha(parameters.alpha); + impl->set_full_search_history(parameters.use_full_search_history); // Configure default search parameters - auto sp = impl_->get_search_parameters(); + auto sp = impl->get_search_parameters(); sp.buffer_config({this->search_window_size, this->search_buffer_capacity}); - impl_->set_search_parameters(sp); - impl_->reset_performance_parameters(); + impl->set_search_parameters(sp); + impl->reset_performance_parameters(); + return impl; } // Preprocess batch of vectors @@ -204,6 +216,42 @@ class SVSIndex : public VecSimIndexAbstract, fl return processed_blob; } + // Handler to manage SVS implementation instance + struct SVSImplHandler : public SVSIndexBase::ImplHandler { + std::unique_ptr impl; + SVSImplHandler(std::unique_ptr impl) : impl{std::move(impl)} {} + }; + + std::unique_ptr createImpl(const void *vectors_data, const labelType *labels, + size_t n) override { + // If no data provided, return empty handler + if (n == 0) { + return std::make_unique(nullptr); + } + + std::span ids(labels, n); + auto processed_blob = this->preprocessForBatchStorage(vectors_data, n); + auto typed_vectors_data = static_cast(processed_blob.get()); + // Wrap data into SVS SimpleDataView for SVS API + auto points = svs::data::SimpleDataView{typed_vectors_data, n, this->dim}; + + return std::make_unique(initImpl(points, ids)); + } + + void setImpl(std::unique_ptr handler) override { + assert(handler); + assert(impl_ == nullptr); // Should be called only on empty impl_ + if (impl_ != nullptr) { + throw ANNEXCEPTION("SVSIndex::setImpl called on non-empty impl_"); + } + + SVSImplHandler *svs_handler = dynamic_cast(handler.get()); + if (!svs_handler) { + throw ANNEXCEPTION("Failed to cast to SVSImplHandler"); + } + this->impl_ = std::move(svs_handler->impl); + } + // Assuming numThreads was updated to reflect the number of available threads before this // function was called. // This function assumes that the caller has already set numThreads to the appropriate value @@ -230,7 +278,7 @@ class SVSIndex : public VecSimIndexAbstract, fl if (!impl_) { // SVS index instance cannot be empty, so we have to construct it at first rows - initImpl(points, ids); + impl_ = initImpl(points, ids); } else { // Add new points to existing SVS index impl_->add_points(points, ids); diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index 351122367..5f20f69b9 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -672,12 +672,28 @@ class TieredSVSIndex : public VecSimTieredIndex { executeTracingCallback("UpdateJob::before_add_to_svs"); { // lock backend index for writing and add vectors there - std::lock_guard lock(this->mainIndexGuard); + std::shared_lock main_shared_lock(this->mainIndexGuard); auto svs_index = GetSVSIndex(); assert(labels_to_move.size() == vectors_to_move.size() / this->frontendIndex->getDim()); - svs_index->setNumThreads(std::min(availableThreads, labels_to_move.size())); - svs_index->addVectors(vectors_to_move.data(), labels_to_move.data(), - labels_to_move.size()); + if (this->backendIndex->indexSize() == 0) { + // If backend index is empty, we need to initialize it first. + svs_index->setNumThreads(std::min(availableThreads, labels_to_move.size())); + auto impl = svs_index->createImpl(vectors_to_move.data(), labels_to_move.data(), + labels_to_move.size()); + + // Upgrade to unique lock to set the new impl + main_shared_lock.unlock(); + std::lock_guard lock(this->mainIndexGuard); + svs_index->setImpl(std::move(impl)); + } else { + // Backend index is initialized - just add the vectors. + main_shared_lock.unlock(); + std::lock_guard lock(this->mainIndexGuard); + // Upgrade to unique lock to add vectors + svs_index->setNumThreads(std::min(availableThreads, labels_to_move.size())); + svs_index->addVectors(vectors_to_move.data(), labels_to_move.data(), + labels_to_move.size()); + } } executeTracingCallback("UpdateJob::after_add_to_svs"); // clean-up frontend index From 45e6dca09fe27ccbf3c85f0de8e81fcf20e290fd Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Thu, 5 Feb 2026 15:56:19 +0100 Subject: [PATCH 2/5] Avoid unique locking for deletion of non-existing vectors from backend index. --- src/VecSim/algorithms/svs/svs.h | 11 ++++++++--- src/VecSim/algorithms/svs/svs_tiered.h | 19 ++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h index 7a8091a33..6fb48098a 100644 --- a/src/VecSim/algorithms/svs/svs.h +++ b/src/VecSim/algorithms/svs/svs.h @@ -39,6 +39,7 @@ struct SVSIndexBase virtual ~SVSIndexBase() = default; virtual int addVectors(const void *vectors_data, const labelType *labels, size_t n) = 0; virtual int deleteVectors(const labelType *labels, size_t n) = 0; + virtual bool isLabelExists(labelType label) const = 0; virtual size_t indexStorageSize() const = 0; virtual size_t getNumThreads() const = 0; virtual void setNumThreads(size_t numThreads) = 0; @@ -239,15 +240,15 @@ class SVSIndex : public VecSimIndexAbstract, fl } void setImpl(std::unique_ptr handler) override { - assert(handler); + assert(handler && "SVSIndex::setImpl called with null handler"); assert(impl_ == nullptr); // Should be called only on empty impl_ if (impl_ != nullptr) { - throw ANNEXCEPTION("SVSIndex::setImpl called on non-empty impl_"); + throw std::logic_error("SVSIndex::setImpl called on non-empty impl_"); } SVSImplHandler *svs_handler = dynamic_cast(handler.get()); if (!svs_handler) { - throw ANNEXCEPTION("Failed to cast to SVSImplHandler"); + throw std::logic_error("Failed to cast to SVSImplHandler"); } this->impl_ = std::move(svs_handler->impl); } @@ -538,6 +539,10 @@ class SVSIndex : public VecSimIndexAbstract, fl return deleteVectorsImpl(labels, n); } + bool isLabelExists(labelType label) const override { + return impl_ ? impl_->has_id(label) : false; + } + size_t getNumThreads() const override { return threadpool_.size(); } void setNumThreads(size_t numThreads) override { threadpool_.resize(numThreads); } diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index 5f20f69b9..f037a82f6 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -817,8 +817,15 @@ class TieredSVSIndex : public VecSimTieredIndex { } } // Remove vector from the backend index if it exists in case of non-MULTI. - std::lock_guard lock(this->mainIndexGuard); - ret -= svs_index->deleteVectors(&label, 1); + auto label_exists = [&]() { + std::shared_lock lock(this->mainIndexGuard); + return svs_index->isLabelExists(label); + }(); + + if (label_exists) { + std::lock_guard lock(this->mainIndexGuard); + ret -= svs_index->deleteVectors(&label, 1); + } } { // Add vector to the frontend index. std::lock_guard lock(this->flatIndexGuard); @@ -903,7 +910,13 @@ class TieredSVSIndex : public VecSimTieredIndex { std::lock_guard lock(this->flatIndexGuard); ret = this->deleteAndRecordSwaps_Unsafe(label); } - { + + label_exists = [&]() { + std::shared_lock lock(this->mainIndexGuard); + return svs_index->isLabelExists(label); + }(); + + if (label_exists) { std::lock_guard lock(this->mainIndexGuard); ret += svs_index->deleteVectors(&label, 1); } From adba42af16a01429423a7834e6ad6b2006fa68ad Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Tue, 17 Feb 2026 11:44:27 +0100 Subject: [PATCH 3/5] Simplify/improve SVSIndex::deleteVector() --- src/VecSim/algorithms/svs/svs.h | 24 ++++++++++++------------ src/VecSim/algorithms/svs/svs_tiered.h | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h index 6fb48098a..7edc0abae 100644 --- a/src/VecSim/algorithms/svs/svs.h +++ b/src/VecSim/algorithms/svs/svs.h @@ -288,6 +288,17 @@ class SVSIndex : public VecSimIndexAbstract, fl return n - deleted_num; } + int deleteVectorImpl(const labelType label) { + if (indexLabelCount() == 0 || !impl_->has_id(label)) { + return 0; + } + + const auto deleted_num = impl_->delete_entries(std::span{&label, 1}); + + this->markIndexUpdate(deleted_num); + return deleted_num; + } + int deleteVectorsImpl(const labelType *labels, size_t n) { if (indexLabelCount() == 0) { return 0; @@ -306,19 +317,8 @@ class SVSIndex : public VecSimIndexAbstract, fl return 0; } - // If entries_to_delete.size() == 1, we should ensure single-threading - const size_t current_num_threads = getNumThreads(); - if (n == 1 && current_num_threads > 1) { - setNumThreads(1); - } - const auto deleted_num = impl_->delete_entries(entries_to_delete); - // Restore multi-threading if needed - if (n == 1 && current_num_threads > 1) { - setNumThreads(current_num_threads); - } - this->markIndexUpdate(deleted_num); return deleted_num; } @@ -533,7 +533,7 @@ class SVSIndex : public VecSimIndexAbstract, fl return addVectorsImpl(vectors_data, labels, n); } - int deleteVector(labelType label) override { return deleteVectorsImpl(&label, 1); } + int deleteVector(labelType label) override { return deleteVectorImpl(label); } int deleteVectors(const labelType *labels, size_t n) override { return deleteVectorsImpl(labels, n); diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index f037a82f6..ceeece1f3 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -824,7 +824,7 @@ class TieredSVSIndex : public VecSimTieredIndex { if (label_exists) { std::lock_guard lock(this->mainIndexGuard); - ret -= svs_index->deleteVectors(&label, 1); + ret -= this->backendIndex->deleteVector(label); } } { // Add vector to the frontend index. @@ -918,7 +918,7 @@ class TieredSVSIndex : public VecSimTieredIndex { if (label_exists) { std::lock_guard lock(this->mainIndexGuard); - ret += svs_index->deleteVectors(&label, 1); + ret += this->backendIndex->deleteVector(label); } return ret; } From 8f0dd5b79ef0529546e5b6ad4687d02e6f6605d5 Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Tue, 17 Feb 2026 13:34:48 +0100 Subject: [PATCH 4/5] Add 2-stage initialization test --- src/VecSim/algorithms/svs/svs.h | 2 - tests/unit/test_svs.cpp | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h index 7edc0abae..8b03514f0 100644 --- a/src/VecSim/algorithms/svs/svs.h +++ b/src/VecSim/algorithms/svs/svs.h @@ -240,8 +240,6 @@ class SVSIndex : public VecSimIndexAbstract, fl } void setImpl(std::unique_ptr handler) override { - assert(handler && "SVSIndex::setImpl called with null handler"); - assert(impl_ == nullptr); // Should be called only on empty impl_ if (impl_ != nullptr) { throw std::logic_error("SVSIndex::setImpl called on non-empty impl_"); } diff --git a/tests/unit/test_svs.cpp b/tests/unit/test_svs.cpp index 102e2ec28..8dd35dd8e 100644 --- a/tests/unit/test_svs.cpp +++ b/tests/unit/test_svs.cpp @@ -277,6 +277,78 @@ TYPED_TEST(SVSTest, svs_bulk_vectors_add_delete_test) { VecSimIndex_Free(index); } +TYPED_TEST(SVSTest, two_stage_initialization_test) { + size_t n = 256; + size_t k = 11; + const size_t dim = 4; + + SVSParams params = { + .dim = dim, + .metric = VecSimMetric_L2, + /* SVS-Vamana specifics */ + .alpha = 1.2, + .graph_max_degree = 64, + .construction_window_size = 20, + .max_candidate_pool_size = 1024, + .prune_to = 60, + .use_search_history = VecSimOption_ENABLE, + }; + + VecSimIndex *index = this->CreateNewIndex(params); + ASSERT_INDEX(index); + + auto svs_index = this->CastToSVS(index); + + std::vector> v(n); + for (size_t i = 0; i < n; i++) { + GenerateVector(v[i].data(), dim, i); + } + + std::vector ids(n); + std::iota(ids.begin(), ids.end(), 0); + + // 2-stage initialization + // initialization with null should fail + EXPECT_THROW(svs_index->setImpl(nullptr), std::logic_error); + + // initialization with data should succeed + auto impl = svs_index->createImpl(v.data(), ids.data(), n); + svs_index->setImpl(std::move(impl)); + + ASSERT_EQ(VecSimIndex_IndexSize(index), n); + + TEST_DATA_T query[] = {50, 50, 50, 50}; + auto verify_res = [&](size_t id, double score, size_t index) { EXPECT_EQ(id, (index + 45)); }; + runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID); + + // Try to re-initialize with the same data. + impl = svs_index->createImpl(v.data(), ids.data(), n); + // Should fail because the index is not empty. + EXPECT_THROW(svs_index->setImpl(std::move(impl)), std::logic_error); + + // Index should remain unchanged. + ASSERT_EQ(VecSimIndex_IndexSize(index), n); + runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID); + + // Delete almost all vectors + const size_t keep_num = 1; + ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num); + // setImpl() should fail again because the index is not empty. + impl = svs_index->createImpl(v.data(), ids.data(), n); + EXPECT_THROW(svs_index->setImpl(std::move(impl)), std::logic_error); + + // Delete rest of the vectors - index should be empty now and setImpl() should succeed. + ASSERT_EQ(svs_index->deleteVectors(ids.data() + n - keep_num, keep_num), keep_num); + ASSERT_EQ(VecSimIndex_IndexSize(index), 0); + // Re-initialization should succeed. + impl = svs_index->createImpl(v.data(), ids.data(), n); + svs_index->setImpl(std::move(impl)); + ASSERT_EQ(VecSimIndex_IndexSize(index), n); + runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID); + + VecSimIndex_Free(index); +} + TYPED_TEST(SVSTest, svs_get_distance) { // Scalar quantization accuracy is insufficient for this test. if (this->isFallbackToSQ()) { From e270c0024722520a67c502030fb5cd3231321455 Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Wed, 18 Feb 2026 11:25:37 +0100 Subject: [PATCH 5/5] Resolve "undoing the delete" issue --- src/VecSim/algorithms/svs/svs_tiered.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index ceeece1f3..4e0477b8d 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -218,6 +218,10 @@ class TieredSVSIndex : public VecSimTieredIndex { using swap_record = std::tuple; constexpr static size_t SKIP_LABEL = std::numeric_limits::max(); std::vector swaps_journal; + // deleted_labels_journal is used by updateSVSIndex() to track vectors that were deleted from + // Flat index during SVS index updating. The journal contains the deleted labels. These labels + // are used to delete the same vectors from the SVS index at the end of the update. + std::vector deleted_labels_journal; size_t trainingTriggerThreshold; size_t updateTriggerThreshold; @@ -668,6 +672,7 @@ class TieredSVSIndex : public VecSimTieredIndex { } // reset journal to the current frontend index state swaps_journal.clear(); + deleted_labels_journal.clear(); } // release frontend index executeTracingCallback("UpdateJob::before_add_to_svs"); @@ -700,6 +705,18 @@ class TieredSVSIndex : public VecSimTieredIndex { { // lock frontend index for writing and delete moved vectors std::lock_guard lock(this->flatIndexGuard); + // delete vectors from backend index that were deleted from the frontend index during + // the update process. + std::sort(deleted_labels_journal.begin(), deleted_labels_journal.end()); + auto it = std::unique(deleted_labels_journal.begin(), deleted_labels_journal.end()); + deleted_labels_journal.erase(it, deleted_labels_journal.end()); + { + std::lock_guard main_lock(this->mainIndexGuard); + auto svs_index = GetSVSIndex(); + svs_index->deleteVectors(deleted_labels_journal.data(), + deleted_labels_journal.size()); + } + // Apply swaps from journal to labels_to_move to reflect changes made in meanwhile. applySwapsToLabelsArray(labels_to_move, this->swaps_journal); @@ -837,6 +854,7 @@ class TieredSVSIndex : public VecSimTieredIndex { for (auto id : this->frontendIndex->getElementIds(label)) { this->swaps_journal.emplace_back(SKIP_LABEL, id, id); } + deleted_labels_journal.push_back(label); } ret = std::max(ret + ft_ret, 0); // Check frontend index size to determine if an update job schedule is needed. @@ -891,6 +909,7 @@ class TieredSVSIndex : public VecSimTieredIndex { this->swaps_journal.emplace_back(SKIP_LABEL, id, id); } } + deleted_labels_journal.push_back(label); return deleting_ids.size(); }