Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/VecSim/algorithms/brute_force/brute_force_multi.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,25 @@ class BruteForceIndex_Multi : public BruteForceIndex<DataType, DistType> {
double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override;
inline size_t indexLabelCount() const override { return this->labelToIdsLookup.size(); }

// Relabel all vectors stored under old_label to new_label without moving their data. No
// internal lock (see brute_force_single.h); the caller provides mutual exclusion.
int relabelVector(labelType old_label, labelType new_label) override {
auto it = this->labelToIdsLookup.find(old_label);
if (it == this->labelToIdsLookup.end()) {
return 0; // old_label not found
}
if (this->labelToIdsLookup.find(new_label) != this->labelToIdsLookup.end()) {
return 0; // new_label already exists; caller should fall back to delete + add
}
for (idType id : it->second) {
this->idToLabelMapping[id] = new_label;
}
auto ids = std::move(it->second);
this->labelToIdsLookup.erase(it);
this->labelToIdsLookup.emplace(new_label, std::move(ids));
return 1;
}

inline std::unique_ptr<vecsim_stl::abstract_results_container>
getNewResultsContainer(size_t cap) const override {
return std::unique_ptr<vecsim_stl::abstract_results_container>(
Expand Down
18 changes: 18 additions & 0 deletions src/VecSim/algorithms/brute_force/brute_force_single.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@ class BruteForceIndex_Single : public BruteForceIndex<DataType, DistType> {
int deleteVectorById(labelType label, idType id) override;
double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override;

// Relabel a stored vector without moving its data. The brute-force index has no internal lock;
// callers (the tiered index under flatIndexGuard, or a standalone FLAT index under the search
// module's spec lock) provide mutual exclusion, matching addVector/deleteVector here.
int relabelVector(labelType old_label, labelType new_label) override {
auto it = labelToIdLookup.find(old_label);
if (it == labelToIdLookup.end()) {
return 0; // old_label not found
}
if (labelToIdLookup.find(new_label) != labelToIdLookup.end()) {
return 0; // new_label already exists; caller should fall back to delete + add
}
idType id = it->second;
labelToIdLookup.erase(it);
labelToIdLookup.emplace(new_label, id);
this->idToLabelMapping[id] = new_label;
return 1;
}

std::unique_ptr<vecsim_stl::abstract_results_container>
getNewResultsContainer(size_t cap) const override {
return std::unique_ptr<vecsim_stl::abstract_results_container>(
Expand Down
11 changes: 11 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,17 @@ class HNSWIndex : public VecSimIndexAbstract<DataType, DistType>,
void unlockIndexDataGuard() const;
void lockSharedIndexDataGuard() const;
void unlockSharedIndexDataGuard() const;
// Relabel an existing vector from old_label to new_label without touching the graph topology.
// The internal id is unchanged, so all neighbor edges (which reference internal ids) stay
// valid; only the label<->id mapping and idToMetaData[id].label are updated.
// relabelVectorUnsafe assumes the caller already holds indexDataGuard (used by the tiered
// index, which holds it while coordinating both tiers); relabelVector takes the exclusive guard
// itself.
virtual int relabelVectorUnsafe(labelType old_label, labelType new_label) = 0;
int relabelVector(labelType old_label, labelType new_label) override {
std::unique_lock<std::shared_mutex> guard(indexDataGuard);
return relabelVectorUnsafe(old_label, new_label);
}
void lockNodeLinks(idType node_id) const;
void unlockNodeLinks(idType node_id) const;
void lockNodeLinks(ElementGraphData *node_data) const;
Expand Down
17 changes: 17 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw_multi.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,23 @@ class HNSWIndex_Multi : public HNSWIndex<DataType, DistType> {
return getDistanceFromInternal(label, vector_data);
}
int removeLabel(labelType label) override { return labelLookup.erase(label); }
int relabelVectorUnsafe(labelType old_label, labelType new_label) override {
auto it = labelLookup.find(old_label);
if (it == labelLookup.end()) {
return 0; // old_label not found
}
if (labelLookup.find(new_label) != labelLookup.end()) {
return 0; // new_label already exists; caller should fall back to delete + add
}
// A label may map to several internal ids in MULTI mode; relabel all of them.
for (idType id : it->second) {
this->idToMetaData[id].label = new_label;
}
auto ids = std::move(it->second);
labelLookup.erase(it);
labelLookup.emplace(new_label, std::move(ids));
return 1;
}
};

/**
Expand Down
14 changes: 14 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw_single.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,20 @@ class HNSWIndex_Single : public HNSWIndex<DataType, DistType> {
return getDistanceFromInternal(label, vector_data);
}
int removeLabel(labelType label) override { return labelLookup.erase(label); }
int relabelVectorUnsafe(labelType old_label, labelType new_label) override {
auto it = labelLookup.find(old_label);
if (it == labelLookup.end()) {
return 0; // old_label not found
}
if (labelLookup.find(new_label) != labelLookup.end()) {
return 0; // new_label already exists; caller should fall back to delete + add
}
idType id = it->second;
labelLookup.erase(it);
labelLookup[new_label] = id;
this->idToMetaData[id].label = new_label;
return 1;
}
};

/**
Expand Down
43 changes: 43 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw_tiered.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {

int addVector(const void *blob, labelType label) override;
int deleteVector(labelType label) override;
int relabelVector(labelType old_label, labelType new_label) override;
size_t getNumMarkedDeleted() const override {
return this->getHNSWIndex()->getNumMarkedDeleted();
}
Expand Down Expand Up @@ -862,6 +863,48 @@ int TieredHNSWIndex<DataType, DistType>::deleteVector(labelType label) {
return num_deleted_vectors;
}

template <typename DataType, typename DistType>
int TieredHNSWIndex<DataType, DistType>::relabelVector(labelType old_label, labelType new_label) {
auto *hnsw_index = this->getHNSWIndex();
int ret = 0;

// Take both exclusive locks in the canonical order (flat then main). Holding both prevents a
// search or the background ingestion worker from observing a half-renamed state, and prevents a
// pending insert job from being executed (ingesting old_label into HNSW) between the two tier
// updates. The caller must guarantee new_label does not already exist in the index.
this->flatIndexGuard.lock();
this->lockMainIndexGuard();

// Flat tier: relabel the buffered vector (if present) and re-key any pending insert job(s), so
// a not-yet-ingested vector is later ingested into HNSW under new_label rather than old_label.
if (this->frontendIndex->isLabelExists(old_label)) {
if (this->frontendIndex->relabelVector(old_label, new_label) == 1) {
ret = 1;
}
auto it = this->labelToInsertJobs.find(old_label);
if (it != this->labelToInsertJobs.end()) {
for (auto *job : it->second) {
job->label = new_label;
}
auto jobs = std::move(it->second);
this->labelToInsertJobs.erase(it);
this->labelToInsertJobs.emplace(new_label, std::move(jobs));
}
}

// Backend (HNSW) tier: we already hold mainIndexGuard, so take the HNSW data guard and use the
// lock-free variant to avoid re-locking it recursively.
hnsw_index->lockIndexDataGuard();
if (hnsw_index->relabelVectorUnsafe(old_label, new_label) == 1) {
ret = 1;
}
hnsw_index->unlockIndexDataGuard();

this->unlockMainIndexGuard();
this->flatIndexGuard.unlock();
return ret;
}

// `getDistanceFrom` returns the minimum distance between the given blob and the vector with the
// given label. If the label doesn't exist, the distance will be NaN.
// Therefore, it's better to just call `getDistanceFrom` on both indexes and return the minimum
Expand Down
2 changes: 2 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_bufferLimit_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_bufferLimitAsync_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_RangeSearch_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_parallelRangeSearch_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_RelabelVectorInBackend_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_RelabelVectorPendingInFlat_Test)

INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_insertJobAsync_Test)
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_insertJobAsyncMulti_Test)
Expand Down
4 changes: 4 additions & 0 deletions src/VecSim/vec_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ extern "C" int VecSimIndex_DeleteVector(VecSimIndex *index, size_t label) {
return index->deleteVector(label);
}

extern "C" int VecSimIndex_RelabelVector(VecSimIndex *index, size_t old_label, size_t new_label) {
return index->relabelVector(old_label, new_label);
}

extern "C" double VecSimIndex_GetDistanceFrom_Unsafe(VecSimIndex *index, size_t label,
const void *blob) {
return index->getDistanceFrom_Unsafe(label, blob);
Expand Down
18 changes: 18 additions & 0 deletions src/VecSim/vec_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,24 @@ int VecSimIndex_AddVector(VecSimIndex *index, const void *blob, size_t label);
*/
int VecSimIndex_DeleteVector(VecSimIndex *index, size_t label);

/**
* @brief Change the external label of an already-stored vector from @p old_label to @p new_label
* without re-inserting the vector or modifying the graph. Only the label<->internal-id mapping is
* updated; the internal id and all neighbor edges are left intact, so this is O(1) per stored
* vector and avoids the graph churn of a delete + re-insert.
*
* Use this when re-keying an unchanged vector (e.g. a document whose id changed on update but whose
* vector value did not). For the tiered index this also fixes up any pending insert job so a
* not-yet-ingested vector is ingested under @p new_label.
*
* @param index the index containing the vector.
* @param old_label the current label of the stored vector.
* @param new_label the label to assign. Must not already exist in the index.
* @return 1 if relabeled, 0 if @p old_label was not found, or VECSIM_RELABEL_NOT_SUPPORTED (-1) if
* the index type does not support relabeling (caller should fall back to delete + add).
*/
int VecSimIndex_RelabelVector(VecSimIndex *index, size_t old_label, size_t new_label);

/**
* @brief Calculate the distance of a vector from an index to a vector. This function assumes that
* the vector fits the index - its type and dimension are the same as the index's, and if the
Expand Down
4 changes: 4 additions & 0 deletions src/VecSim/vec_sim_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ typedef enum { VecSimMetric_L2, VecSimMetric_IP, VecSimMetric_Cosine } VecSimMet
typedef size_t labelType;
typedef unsigned int idType;

// Return value of VecSimIndex_RelabelVector / relabelVector when the index type does not
// implement relabeling and the caller should fall back to delete + add.
#define VECSIM_RELABEL_NOT_SUPPORTED (-1)

/**
* @brief Query Runtime raw parameters.
* Use VecSimIndex_ResolveParams to generate VecSimQueryParams from array of VecSimRawParams.
Expand Down
20 changes: 20 additions & 0 deletions src/VecSim/vec_sim_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,26 @@ struct VecSimIndexInterface : public VecsimBaseObject {
*/
virtual int deleteVector(labelType label) = 0;

/**
* @brief Change the external label of an already-stored vector from @c old_label to
* @c new_label, WITHOUT re-inserting the vector or touching the graph topology. This is a
* cheap O(1) (per stored vector) relabel: only the label<->internal-id mapping is updated;
* the internal id and all neighbor edges (which reference internal ids) are left untouched.
*
* Intended for callers that re-key an unchanged vector (e.g. a search module that assigns a
* new document id on update but whose vector value did not change), so the expensive
* delete+re-insert cycle (and the resulting graph churn) can be avoided.
*
* @param old_label the current label of the stored vector.
* @param new_label the label to assign. Must not already exist in the index.
* @return 1 if the vector was relabeled, 0 if @c old_label was not found, and
* VECSIM_RELABEL_NOT_SUPPORTED (-1) if this index type does not implement relabeling
* (in which case the caller should fall back to delete + add).
*/
virtual int relabelVector(labelType old_label, labelType new_label) {
return VECSIM_RELABEL_NOT_SUPPORTED;
}

/**
* @brief Calculate the distance of a vector from an index to a vector.
* @param index the index from which the first vector is located, and that defines the distance
Expand Down
25 changes: 25 additions & 0 deletions tests/unit/test_bruteforce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,31 @@ TYPED_TEST(BruteForceTest, brute_force_vector_add_test) {
VecSimIndex_Free(index);
}

TYPED_TEST(BruteForceTest, brute_force_relabel_vector_test) {
size_t dim = 4;
BFParams params = {.dim = dim, .metric = VecSimMetric_L2};
VecSimIndex *index = this->CreateNewIndex(params);

TEST_DATA_T vec[dim];
GenerateVector<TEST_DATA_T>(vec, dim, 1.7);
VecSimIndex_AddVector(index, vec, 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);

// Relabel in place: size unchanged, vector answers to the new label, old label gone.
ASSERT_EQ(VecSimIndex_RelabelVector(index, 1, 2), 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
ASSERT_EQ(VecSimIndex_GetDistanceFrom_Unsafe(index, 2, vec), 0);
ASSERT_TRUE(std::isnan(VecSimIndex_GetDistanceFrom_Unsafe(index, 1, vec)));

// No-op for a missing label; refused onto an existing label.
ASSERT_EQ(VecSimIndex_RelabelVector(index, 1, 3), 0);
GenerateAndAddVector<TEST_DATA_T>(index, dim, 5, 5.0);
ASSERT_EQ(VecSimIndex_RelabelVector(index, 2, 5), 0);
ASSERT_EQ(VecSimIndex_GetDistanceFrom_Unsafe(index, 2, vec), 0);

VecSimIndex_Free(index);
}

TYPED_TEST(BruteForceTest, brute_force_vector_update_test) {
size_t dim = 4;
size_t n = 1;
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/test_hnsw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,34 @@ TYPED_TEST(HNSWTest, hnsw_vector_add_test) {
VecSimIndex_Free(index);
}

TYPED_TEST(HNSWTest, hnsw_relabel_vector_test) {
size_t dim = 4;
HNSWParams params = {.dim = dim, .metric = VecSimMetric_L2, .M = 16, .efConstruction = 200};
VecSimIndex *index = this->CreateNewIndex(params);

TEST_DATA_T vec[dim];
GenerateVector<TEST_DATA_T>(vec, dim, 1.7);
VecSimIndex_AddVector(index, vec, 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);

// Relabel an existing label in place: size is unchanged, the vector answers to the new label,
// and the old label is gone.
ASSERT_EQ(VecSimIndex_RelabelVector(index, 1, 2), 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
ASSERT_EQ(VecSimIndex_GetDistanceFrom_Unsafe(index, 2, vec), 0);
ASSERT_TRUE(std::isnan(VecSimIndex_GetDistanceFrom_Unsafe(index, 1, vec)));

// Relabeling a missing label is a no-op.
ASSERT_EQ(VecSimIndex_RelabelVector(index, 1, 3), 0);

// Relabeling onto an already-existing label is refused (caller falls back to delete + add).
GenerateAndAddVector<TEST_DATA_T>(index, dim, 5, 5.0);
ASSERT_EQ(VecSimIndex_RelabelVector(index, 2, 5), 0);
ASSERT_EQ(VecSimIndex_GetDistanceFrom_Unsafe(index, 2, vec), 0); // unchanged after the refusal

VecSimIndex_Free(index);
}

TYPED_TEST(HNSWTest, hnsw_blob_sanity_test) {
size_t dim = 4;
size_t bs = 1;
Expand Down
Loading