From 8ec72b63c077bab63ceafdf169794b3189ffbf5f Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Sun, 7 Jun 2026 12:08:59 +0300 Subject: [PATCH 1/4] Add VecSimIndex_RelabelVector to re-key a vector without re-insertion Adds a public C API VecSimIndex_RelabelVector(index, old_label, new_label) that changes a stored vector's external label in place, without touching the graph topology. HNSW neighbor edges reference internal ids, which a relabel leaves unchanged, so only the label<->internal-id mapping and idToMetaData[id].label are updated (O(1) per stored vector). This lets a caller re-key an unchanged vector (e.g. a search module that assigns a new doc id on update but whose vector value did not change) and avoid the delete + re-insert HNSW graph churn. - vec_sim_interface.h: non-pure virtual relabelVector with a NOT_SUPPORTED default, so index types without an implementation (e.g. SVS) are unaffected. - BruteForce single/multi: O(1) label-map relabel (no internal lock; the caller provides mutual exclusion, matching addVector/deleteVector). - HNSW single/multi: relabelVectorUnsafe updates labelLookup + idToMetaData; relabelVector wraps it under an exclusive indexDataGuard. - Tiered HNSW: under flatIndexGuard + mainIndexGuard, relabels the flat copy, rewrites any pending HNSWInsertJob.label and rekeys labelToInsertJobs (so a not-yet-ingested vector is ingested under the new label), then relabels the HNSW backend via the lock-free variant (no recursive lock). - Tests: RelabelVectorInBackend and RelabelVectorPendingInFlat (tiered, covering single/multi and float/double). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../brute_force/brute_force_multi.h | 19 +++++ .../brute_force/brute_force_single.h | 18 +++++ src/VecSim/algorithms/hnsw/hnsw.h | 10 +++ src/VecSim/algorithms/hnsw/hnsw_multi.h | 17 +++++ src/VecSim/algorithms/hnsw/hnsw_single.h | 14 ++++ src/VecSim/algorithms/hnsw/hnsw_tiered.h | 43 ++++++++++++ src/VecSim/vec_sim.cpp | 4 ++ src/VecSim/vec_sim.h | 18 +++++ src/VecSim/vec_sim_common.h | 4 ++ src/VecSim/vec_sim_interface.h | 20 ++++++ tests/unit/test_hnsw_tiered.cpp | 70 +++++++++++++++++++ 11 files changed, 237 insertions(+) diff --git a/src/VecSim/algorithms/brute_force/brute_force_multi.h b/src/VecSim/algorithms/brute_force/brute_force_multi.h index 343faea6b..03876d90c 100644 --- a/src/VecSim/algorithms/brute_force/brute_force_multi.h +++ b/src/VecSim/algorithms/brute_force/brute_force_multi.h @@ -32,6 +32,25 @@ class BruteForceIndex_Multi : public BruteForceIndex { double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override; inline size_t indexLabelCount() const override { return this->labelToIdsLookup.size(); } + // Relabel all vectors stored under old_label to new_label without moving their data. No internal + // lock (see brute_force_single.h); the caller provides mutual exclusion. + int relabelVector(labelType old_label, labelType new_label) override { + auto it = this->labelToIdsLookup.find(old_label); + if (it == this->labelToIdsLookup.end()) { + return 0; // old_label not found + } + if (this->labelToIdsLookup.find(new_label) != this->labelToIdsLookup.end()) { + return 0; // new_label already exists; caller should fall back to delete + add + } + for (idType id : it->second) { + this->idToLabelMapping[id] = new_label; + } + auto ids = std::move(it->second); + this->labelToIdsLookup.erase(it); + this->labelToIdsLookup.emplace(new_label, std::move(ids)); + return 1; + } + inline std::unique_ptr getNewResultsContainer(size_t cap) const override { return std::unique_ptr( diff --git a/src/VecSim/algorithms/brute_force/brute_force_single.h b/src/VecSim/algorithms/brute_force/brute_force_single.h index 9afe46ed3..5589486de 100644 --- a/src/VecSim/algorithms/brute_force/brute_force_single.h +++ b/src/VecSim/algorithms/brute_force/brute_force_single.h @@ -29,6 +29,24 @@ class BruteForceIndex_Single : public BruteForceIndex { int deleteVectorById(labelType label, idType id) override; double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override; + // Relabel a stored vector without moving its data. The brute-force index has no internal lock; + // callers (the tiered index under flatIndexGuard, or a standalone FLAT index under the search + // module's spec lock) provide mutual exclusion, matching addVector/deleteVector here. + int relabelVector(labelType old_label, labelType new_label) override { + auto it = labelToIdLookup.find(old_label); + if (it == labelToIdLookup.end()) { + return 0; // old_label not found + } + if (labelToIdLookup.find(new_label) != labelToIdLookup.end()) { + return 0; // new_label already exists; caller should fall back to delete + add + } + idType id = it->second; + labelToIdLookup.erase(it); + labelToIdLookup.emplace(new_label, id); + this->idToLabelMapping[id] = new_label; + return 1; + } + std::unique_ptr getNewResultsContainer(size_t cap) const override { return std::unique_ptr( diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 1a3776fa2..53405e89d 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -250,6 +250,16 @@ class HNSWIndex : public VecSimIndexAbstract, void unlockIndexDataGuard() const; void lockSharedIndexDataGuard() const; void unlockSharedIndexDataGuard() const; + // Relabel an existing vector from old_label to new_label without touching the graph topology. + // The internal id is unchanged, so all neighbor edges (which reference internal ids) stay + // valid; only the label<->id mapping and idToMetaData[id].label are updated. + // relabelVectorUnsafe assumes the caller already holds indexDataGuard (used by the tiered index, + // which holds it while coordinating both tiers); relabelVector takes the exclusive guard itself. + virtual int relabelVectorUnsafe(labelType old_label, labelType new_label) = 0; + int relabelVector(labelType old_label, labelType new_label) override { + std::unique_lock guard(indexDataGuard); + return relabelVectorUnsafe(old_label, new_label); + } void lockNodeLinks(idType node_id) const; void unlockNodeLinks(idType node_id) const; void lockNodeLinks(ElementGraphData *node_data) const; diff --git a/src/VecSim/algorithms/hnsw/hnsw_multi.h b/src/VecSim/algorithms/hnsw/hnsw_multi.h index 50ff1a37d..83a9b54cf 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_multi.h +++ b/src/VecSim/algorithms/hnsw/hnsw_multi.h @@ -121,6 +121,23 @@ class HNSWIndex_Multi : public HNSWIndex { return getDistanceFromInternal(label, vector_data); } int removeLabel(labelType label) override { return labelLookup.erase(label); } + int relabelVectorUnsafe(labelType old_label, labelType new_label) override { + auto it = labelLookup.find(old_label); + if (it == labelLookup.end()) { + return 0; // old_label not found + } + if (labelLookup.find(new_label) != labelLookup.end()) { + return 0; // new_label already exists; caller should fall back to delete + add + } + // A label may map to several internal ids in MULTI mode; relabel all of them. + for (idType id : it->second) { + this->idToMetaData[id].label = new_label; + } + auto ids = std::move(it->second); + labelLookup.erase(it); + labelLookup.emplace(new_label, std::move(ids)); + return 1; + } }; /** diff --git a/src/VecSim/algorithms/hnsw/hnsw_single.h b/src/VecSim/algorithms/hnsw/hnsw_single.h index 61899a142..ee213c528 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_single.h +++ b/src/VecSim/algorithms/hnsw/hnsw_single.h @@ -91,6 +91,20 @@ class HNSWIndex_Single : public HNSWIndex { return getDistanceFromInternal(label, vector_data); } int removeLabel(labelType label) override { return labelLookup.erase(label); } + int relabelVectorUnsafe(labelType old_label, labelType new_label) override { + auto it = labelLookup.find(old_label); + if (it == labelLookup.end()) { + return 0; // old_label not found + } + if (labelLookup.find(new_label) != labelLookup.end()) { + return 0; // new_label already exists; caller should fall back to delete + add + } + idType id = it->second; + labelLookup.erase(it); + labelLookup[new_label] = id; + this->idToMetaData[id].label = new_label; + return 1; + } }; /** diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h index 4932516d7..ce7945943 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h +++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h @@ -201,6 +201,7 @@ class TieredHNSWIndex : public VecSimTieredIndex { int addVector(const void *blob, labelType label) override; int deleteVector(labelType label) override; + int relabelVector(labelType old_label, labelType new_label) override; size_t getNumMarkedDeleted() const override { return this->getHNSWIndex()->getNumMarkedDeleted(); } @@ -862,6 +863,48 @@ int TieredHNSWIndex::deleteVector(labelType label) { return num_deleted_vectors; } +template +int TieredHNSWIndex::relabelVector(labelType old_label, labelType new_label) { + auto *hnsw_index = this->getHNSWIndex(); + int ret = 0; + + // Take both exclusive locks in the canonical order (flat then main). Holding both prevents a + // search or the background ingestion worker from observing a half-renamed state, and prevents a + // pending insert job from being executed (ingesting old_label into HNSW) between the two tier + // updates. The caller must guarantee new_label does not already exist in the index. + this->flatIndexGuard.lock(); + this->lockMainIndexGuard(); + + // Flat tier: relabel the buffered vector (if present) and re-key any pending insert job(s), so + // a not-yet-ingested vector is later ingested into HNSW under new_label rather than old_label. + if (this->frontendIndex->isLabelExists(old_label)) { + if (this->frontendIndex->relabelVector(old_label, new_label) == 1) { + ret = 1; + } + auto it = this->labelToInsertJobs.find(old_label); + if (it != this->labelToInsertJobs.end()) { + for (auto *job : it->second) { + job->label = new_label; + } + auto jobs = std::move(it->second); + this->labelToInsertJobs.erase(it); + this->labelToInsertJobs.emplace(new_label, std::move(jobs)); + } + } + + // Backend (HNSW) tier: we already hold mainIndexGuard, so take the HNSW data guard and use the + // lock-free variant to avoid re-locking it recursively. + hnsw_index->lockIndexDataGuard(); + if (hnsw_index->relabelVectorUnsafe(old_label, new_label) == 1) { + ret = 1; + } + hnsw_index->unlockIndexDataGuard(); + + this->unlockMainIndexGuard(); + this->flatIndexGuard.unlock(); + return ret; +} + // `getDistanceFrom` returns the minimum distance between the given blob and the vector with the // given label. If the label doesn't exist, the distance will be NaN. // Therefore, it's better to just call `getDistanceFrom` on both indexes and return the minimum diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp index ceb64502c..d05d9d23f 100644 --- a/src/VecSim/vec_sim.cpp +++ b/src/VecSim/vec_sim.cpp @@ -180,6 +180,10 @@ extern "C" int VecSimIndex_DeleteVector(VecSimIndex *index, size_t label) { return index->deleteVector(label); } +extern "C" int VecSimIndex_RelabelVector(VecSimIndex *index, size_t old_label, size_t new_label) { + return index->relabelVector(old_label, new_label); +} + extern "C" double VecSimIndex_GetDistanceFrom_Unsafe(VecSimIndex *index, size_t label, const void *blob) { return index->getDistanceFrom_Unsafe(label, blob); diff --git a/src/VecSim/vec_sim.h b/src/VecSim/vec_sim.h index 4958a0a79..77514cee1 100644 --- a/src/VecSim/vec_sim.h +++ b/src/VecSim/vec_sim.h @@ -72,6 +72,24 @@ int VecSimIndex_AddVector(VecSimIndex *index, const void *blob, size_t label); */ int VecSimIndex_DeleteVector(VecSimIndex *index, size_t label); +/** + * @brief Change the external label of an already-stored vector from @p old_label to @p new_label + * without re-inserting the vector or modifying the graph. Only the label<->internal-id mapping is + * updated; the internal id and all neighbor edges are left intact, so this is O(1) per stored + * vector and avoids the graph churn of a delete + re-insert. + * + * Use this when re-keying an unchanged vector (e.g. a document whose id changed on update but whose + * vector value did not). For the tiered index this also fixes up any pending insert job so a + * not-yet-ingested vector is ingested under @p new_label. + * + * @param index the index containing the vector. + * @param old_label the current label of the stored vector. + * @param new_label the label to assign. Must not already exist in the index. + * @return 1 if relabeled, 0 if @p old_label was not found, or VECSIM_RELABEL_NOT_SUPPORTED (-1) if + * the index type does not support relabeling (caller should fall back to delete + add). + */ +int VecSimIndex_RelabelVector(VecSimIndex *index, size_t old_label, size_t new_label); + /** * @brief Calculate the distance of a vector from an index to a vector. This function assumes that * the vector fits the index - its type and dimension are the same as the index's, and if the diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h index 8696da087..3c13d3e72 100644 --- a/src/VecSim/vec_sim_common.h +++ b/src/VecSim/vec_sim_common.h @@ -89,6 +89,10 @@ typedef enum { VecSimMetric_L2, VecSimMetric_IP, VecSimMetric_Cosine } VecSimMet typedef size_t labelType; typedef unsigned int idType; +// Return value of VecSimIndex_RelabelVector / relabelVector when the index type does not +// implement relabeling and the caller should fall back to delete + add. +#define VECSIM_RELABEL_NOT_SUPPORTED (-1) + /** * @brief Query Runtime raw parameters. * Use VecSimIndex_ResolveParams to generate VecSimQueryParams from array of VecSimRawParams. diff --git a/src/VecSim/vec_sim_interface.h b/src/VecSim/vec_sim_interface.h index 0b627bc57..22d3942e4 100644 --- a/src/VecSim/vec_sim_interface.h +++ b/src/VecSim/vec_sim_interface.h @@ -53,6 +53,26 @@ struct VecSimIndexInterface : public VecsimBaseObject { */ virtual int deleteVector(labelType label) = 0; + /** + * @brief Change the external label of an already-stored vector from @c old_label to + * @c new_label, WITHOUT re-inserting the vector or touching the graph topology. This is a + * cheap O(1) (per stored vector) relabel: only the label<->internal-id mapping is updated; + * the internal id and all neighbor edges (which reference internal ids) are left untouched. + * + * Intended for callers that re-key an unchanged vector (e.g. a search module that assigns a + * new document id on update but whose vector value did not change), so the expensive + * delete+re-insert cycle (and the resulting graph churn) can be avoided. + * + * @param old_label the current label of the stored vector. + * @param new_label the label to assign. Must not already exist in the index. + * @return 1 if the vector was relabeled, 0 if @c old_label was not found, and + * VECSIM_RELABEL_NOT_SUPPORTED (-1) if this index type does not implement relabeling + * (in which case the caller should fall back to delete + add). + */ + virtual int relabelVector(labelType old_label, labelType new_label) { + return VECSIM_RELABEL_NOT_SUPPORTED; + } + /** * @brief Calculate the distance of a vector from an index to a vector. * @param index the index from which the first vector is located, and that defines the distance diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp index 1898c0aac..df633868e 100644 --- a/tests/unit/test_hnsw_tiered.cpp +++ b/tests/unit/test_hnsw_tiered.cpp @@ -19,6 +19,7 @@ #include "mock_thread_pool.h" #include +#include // Runs the test for all combination of data type(float/double) - label type (single/multi) @@ -134,6 +135,75 @@ TYPED_TEST(HNSWTieredIndexTest, CreateIndexInstance) { ASSERT_EQ(tiered_index->labelToInsertJobs.at(vector_label).size(), 0); } +TYPED_TEST(HNSWTieredIndexTest, RelabelVectorInBackend) { + // Relabel a vector that has already been ingested into the HNSW backend. The relabel must move + // the label to the new value without touching the graph (internal id is stable). + HNSWParams params = {.type = TypeParam::get_index_type(), + .dim = 4, + .metric = VecSimMetric_L2, + .multi = TypeParam::isMulti()}; + VecSimParams hnsw_params = CreateParams(params); + auto mock_thread_pool = tieredIndexMock(); + auto *tiered_index = this->CreateTieredHNSWIndex(hnsw_params, mock_thread_pool); + + size_t dim = tiered_index->backendIndex->getDim(); + TEST_DATA_T vector[dim]; + GenerateVector(vector, dim, 0.5f); + + // Add under label 1, then ingest into HNSW via the background job. + VecSimIndex_AddVector(tiered_index, vector, 1); + mock_thread_pool.thread_iteration(); + ASSERT_EQ(tiered_index->backendIndex->indexSize(), 1); + ASSERT_EQ(tiered_index->frontendIndex->indexSize(), 0); + + // Relabel 1 -> 2. + ASSERT_EQ(VecSimIndex_RelabelVector(tiered_index, 1, 2), 1); + + // Size is unchanged, the vector now answers to label 2, and label 1 is gone. + ASSERT_EQ(tiered_index->indexSize(), 1); + ASSERT_EQ(tiered_index->getDistanceFrom_Unsafe(2, vector), 0); + ASSERT_TRUE(std::isnan(tiered_index->getDistanceFrom_Unsafe(1, vector))); + + // Relabeling a non-existent label is a no-op (returns 0). + ASSERT_EQ(VecSimIndex_RelabelVector(tiered_index, 42, 43), 0); +} + +TYPED_TEST(HNSWTieredIndexTest, RelabelVectorPendingInFlat) { + // Relabel a vector that is still in the flat buffer with a pending ingest job: the buffered + // vector AND the pending insert job must be re-keyed, so the later ingestion lands under the + // new label in HNSW. + HNSWParams params = {.type = TypeParam::get_index_type(), + .dim = 4, + .metric = VecSimMetric_L2, + .multi = TypeParam::isMulti()}; + VecSimParams hnsw_params = CreateParams(params); + auto mock_thread_pool = tieredIndexMock(); + auto *tiered_index = this->CreateTieredHNSWIndex(hnsw_params, mock_thread_pool); + + size_t dim = tiered_index->backendIndex->getDim(); + TEST_DATA_T vector[dim]; + GenerateVector(vector, dim, 0.25f); + + // Add under label 1 (lands in the flat buffer with a pending insert job); do NOT ingest yet. + VecSimIndex_AddVector(tiered_index, vector, 1); + ASSERT_EQ(tiered_index->frontendIndex->indexSize(), 1); + ASSERT_EQ(tiered_index->backendIndex->indexSize(), 0); + ASSERT_EQ(tiered_index->labelToInsertJobs.count(1), 1); + + // Relabel 1 -> 2 while the job is pending. + ASSERT_EQ(VecSimIndex_RelabelVector(tiered_index, 1, 2), 1); + ASSERT_EQ(tiered_index->labelToInsertJobs.count(1), 0); + ASSERT_EQ(tiered_index->labelToInsertJobs.count(2), 1); + ASSERT_EQ(tiered_index->getDistanceFrom_Unsafe(2, vector), 0); + + // Now ingest: the pending job must insert the vector into HNSW under the NEW label 2. + mock_thread_pool.thread_iteration(); + ASSERT_EQ(tiered_index->backendIndex->indexSize(), 1); + ASSERT_EQ(tiered_index->frontendIndex->indexSize(), 0); + ASSERT_EQ(tiered_index->getDistanceFrom_Unsafe(2, vector), 0); + ASSERT_TRUE(std::isnan(tiered_index->getDistanceFrom_Unsafe(1, vector))); +} + TYPED_TEST(HNSWTieredIndexTest, testIndexesAttributes) { // Create TieredHNSW index instance with a mock queue. HNSWParams params = {.type = TypeParam::get_index_type(), From 012ea96fc3d480880a042aa52868ec4550af3477 Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Sun, 7 Jun 2026 13:09:44 +0300 Subject: [PATCH 2/4] Apply clang-format (reflow comments to 100-col limit) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/VecSim/algorithms/brute_force/brute_force_multi.h | 4 ++-- src/VecSim/algorithms/hnsw/hnsw.h | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/VecSim/algorithms/brute_force/brute_force_multi.h b/src/VecSim/algorithms/brute_force/brute_force_multi.h index 03876d90c..e5a2cdf50 100644 --- a/src/VecSim/algorithms/brute_force/brute_force_multi.h +++ b/src/VecSim/algorithms/brute_force/brute_force_multi.h @@ -32,8 +32,8 @@ class BruteForceIndex_Multi : public BruteForceIndex { double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override; inline size_t indexLabelCount() const override { return this->labelToIdsLookup.size(); } - // Relabel all vectors stored under old_label to new_label without moving their data. No internal - // lock (see brute_force_single.h); the caller provides mutual exclusion. + // Relabel all vectors stored under old_label to new_label without moving their data. No + // internal lock (see brute_force_single.h); the caller provides mutual exclusion. int relabelVector(labelType old_label, labelType new_label) override { auto it = this->labelToIdsLookup.find(old_label); if (it == this->labelToIdsLookup.end()) { diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 53405e89d..dcd244e97 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -253,8 +253,9 @@ class HNSWIndex : public VecSimIndexAbstract, // Relabel an existing vector from old_label to new_label without touching the graph topology. // The internal id is unchanged, so all neighbor edges (which reference internal ids) stay // valid; only the label<->id mapping and idToMetaData[id].label are updated. - // relabelVectorUnsafe assumes the caller already holds indexDataGuard (used by the tiered index, - // which holds it while coordinating both tiers); relabelVector takes the exclusive guard itself. + // relabelVectorUnsafe assumes the caller already holds indexDataGuard (used by the tiered + // index, which holds it while coordinating both tiers); relabelVector takes the exclusive guard + // itself. virtual int relabelVectorUnsafe(labelType old_label, labelType new_label) = 0; int relabelVector(labelType old_label, labelType new_label) override { std::unique_lock guard(indexDataGuard); From 8cfc79bc2c606403b374e4ca32e3ad17af8c283f Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Sun, 7 Jun 2026 13:18:09 +0300 Subject: [PATCH 3/4] Declare the new relabel tests as index friends The RelabelVectorInBackend / RelabelVectorPendingInFlat tests access the tiered index internals (frontendIndex, backendIndex, labelToInsertJobs), like the other HNSWTieredIndexTest cases. gtest friendship is not inherited, so each generated test class must be friend-declared explicitly. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h index 21f99f8f5..72f00d044 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h +++ b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h @@ -39,6 +39,8 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_bufferLimit_Test) INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_bufferLimitAsync_Test) INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_RangeSearch_Test) INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_parallelRangeSearch_Test) +INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_RelabelVectorInBackend_Test) +INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_RelabelVectorPendingInFlat_Test) INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_insertJobAsync_Test) INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_insertJobAsyncMulti_Test) From c97f78c0ca63f7fe846c155e8b02344c447f1ccb Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Sun, 7 Jun 2026 14:34:06 +0300 Subject: [PATCH 4/4] Add relabel unit tests for non-tiered HNSW and brute-force indexes Covers the standalone relabelVector path (the locking wrapper for HNSW; the no-lock BF path) and the edge cases not exercised by the tiered tests: relabel of a missing label is a no-op (returns 0), and relabel onto an already-existing label is refused (returns 0) leaving the vector unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/unit/test_bruteforce.cpp | 25 +++++++++++++++++++++++++ tests/unit/test_hnsw.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/tests/unit/test_bruteforce.cpp b/tests/unit/test_bruteforce.cpp index 56307bf88..850715337 100644 --- a/tests/unit/test_bruteforce.cpp +++ b/tests/unit/test_bruteforce.cpp @@ -56,6 +56,31 @@ TYPED_TEST(BruteForceTest, brute_force_vector_add_test) { VecSimIndex_Free(index); } +TYPED_TEST(BruteForceTest, brute_force_relabel_vector_test) { + size_t dim = 4; + BFParams params = {.dim = dim, .metric = VecSimMetric_L2}; + VecSimIndex *index = this->CreateNewIndex(params); + + TEST_DATA_T vec[dim]; + GenerateVector(vec, dim, 1.7); + VecSimIndex_AddVector(index, vec, 1); + ASSERT_EQ(VecSimIndex_IndexSize(index), 1); + + // Relabel in place: size unchanged, vector answers to the new label, old label gone. + ASSERT_EQ(VecSimIndex_RelabelVector(index, 1, 2), 1); + ASSERT_EQ(VecSimIndex_IndexSize(index), 1); + ASSERT_EQ(VecSimIndex_GetDistanceFrom_Unsafe(index, 2, vec), 0); + ASSERT_TRUE(std::isnan(VecSimIndex_GetDistanceFrom_Unsafe(index, 1, vec))); + + // No-op for a missing label; refused onto an existing label. + ASSERT_EQ(VecSimIndex_RelabelVector(index, 1, 3), 0); + GenerateAndAddVector(index, dim, 5, 5.0); + ASSERT_EQ(VecSimIndex_RelabelVector(index, 2, 5), 0); + ASSERT_EQ(VecSimIndex_GetDistanceFrom_Unsafe(index, 2, vec), 0); + + VecSimIndex_Free(index); +} + TYPED_TEST(BruteForceTest, brute_force_vector_update_test) { size_t dim = 4; size_t n = 1; diff --git a/tests/unit/test_hnsw.cpp b/tests/unit/test_hnsw.cpp index 47a5393b7..6bee5cd72 100644 --- a/tests/unit/test_hnsw.cpp +++ b/tests/unit/test_hnsw.cpp @@ -57,6 +57,34 @@ TYPED_TEST(HNSWTest, hnsw_vector_add_test) { VecSimIndex_Free(index); } +TYPED_TEST(HNSWTest, hnsw_relabel_vector_test) { + size_t dim = 4; + HNSWParams params = {.dim = dim, .metric = VecSimMetric_L2, .M = 16, .efConstruction = 200}; + VecSimIndex *index = this->CreateNewIndex(params); + + TEST_DATA_T vec[dim]; + GenerateVector(vec, dim, 1.7); + VecSimIndex_AddVector(index, vec, 1); + ASSERT_EQ(VecSimIndex_IndexSize(index), 1); + + // Relabel an existing label in place: size is unchanged, the vector answers to the new label, + // and the old label is gone. + ASSERT_EQ(VecSimIndex_RelabelVector(index, 1, 2), 1); + ASSERT_EQ(VecSimIndex_IndexSize(index), 1); + ASSERT_EQ(VecSimIndex_GetDistanceFrom_Unsafe(index, 2, vec), 0); + ASSERT_TRUE(std::isnan(VecSimIndex_GetDistanceFrom_Unsafe(index, 1, vec))); + + // Relabeling a missing label is a no-op. + ASSERT_EQ(VecSimIndex_RelabelVector(index, 1, 3), 0); + + // Relabeling onto an already-existing label is refused (caller falls back to delete + add). + GenerateAndAddVector(index, dim, 5, 5.0); + ASSERT_EQ(VecSimIndex_RelabelVector(index, 2, 5), 0); + ASSERT_EQ(VecSimIndex_GetDistanceFrom_Unsafe(index, 2, vec), 0); // unchanged after the refusal + + VecSimIndex_Free(index); +} + TYPED_TEST(HNSWTest, hnsw_blob_sanity_test) { size_t dim = 4; size_t bs = 1;