Skip to content

Commit 2016e9f

Browse files
authored
feat: rename VectorSearchGlobalIndexResult to ScoredGlobalIndexResult (#124)
1 parent 0a4f4e2 commit 2016e9f

23 files changed

Lines changed: 252 additions & 184 deletions

cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ macro(build_lucene)
292292
set(LUCENE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lucene_ep-install")
293293
set(LUCENE_CMAKE_ARGS
294294
${EP_COMMON_CMAKE_ARGS}
295+
"-DLUCENE_BUILD_SHARED=OFF"
295296
"-DENABLE_TEST=OFF"
296297
"-DCMAKE_C_FLAGS=-pthread"
297298
"-DCMAKE_CXX_FLAGS=-pthread"
@@ -303,7 +304,7 @@ macro(build_lucene)
303304
"-DBoost_THREAD_FOUND=TRUE"
304305
"-DCMAKE_INSTALL_PREFIX=${LUCENE_PREFIX}")
305306

306-
set(LUCENE_LIB "${LUCENE_PREFIX}/lib/liblucene++.so.0")
307+
set(LUCENE_LIB "${LUCENE_PREFIX}/lib/liblucene++.a")
307308
externalproject_add(lucene_ep
308309
${EP_COMMON_OPTIONS}
309310
URL ${LUCENE_SOURCE_URL}
@@ -323,13 +324,14 @@ macro(build_lucene)
323324
# The include directory must exist before it is referenced by a target.
324325
file(MAKE_DIRECTORY "${LUCENE_INCLUDE_DIR}")
325326
include_directories(SYSTEM ${LUCENE_INCLUDE_DIR} ${BOOST_INCLUDE_DIR})
326-
add_library(lucene INTERFACE IMPORTED)
327-
target_include_directories(lucene SYSTEM INTERFACE "${LUCENE_INCLUDE_DIR}")
328-
target_compile_options(lucene INTERFACE -pthread)
327+
add_library(lucene STATIC IMPORTED)
328+
set_target_properties(lucene
329+
PROPERTIES IMPORTED_LOCATION "${LUCENE_LIB}"
330+
INTERFACE_INCLUDE_DIRECTORIES
331+
"${LUCENE_INCLUDE_DIR}")
329332

330333
target_link_libraries(lucene
331-
INTERFACE "${LUCENE_LIB}"
332-
boost_date_time
334+
INTERFACE boost_date_time
333335
boost_filesystem
334336
boost_regex
335337
boost_thread

include/paimon/global_index/bitmap_vector_search_global_index_result.h renamed to include/paimon/global_index/bitmap_scored_global_index_result.h

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2024-present Alibaba Inc.
2+
* Copyright 2026-present Alibaba Inc.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -26,25 +26,24 @@
2626
#include "paimon/visibility.h"
2727

2828
namespace paimon {
29-
/// Represents a vector search global index result that combines a Roaring bitmap of candidate row
29+
/// Represents a scored global index result that combines a Roaring bitmap of candidate row
3030
/// ids with an array of associated relevance scores.
3131
///
32-
/// **Important Ordering Note**: Despite inheriting from VectorSearchGlobalIndexResult, the results
33-
/// are
32+
/// **Important Ordering Note**: Inheriting from ScoredGlobalIndexResult, the results are
3433
/// **NOT sorted by score**. Instead, both the bitmap and the score vector are ordered by
3534
/// **ascending row id**. This design enables efficient merging and set operations while preserving
3635
/// row id-to-score mapping.
37-
class PAIMON_EXPORT BitmapVectorSearchGlobalIndexResult : public VectorSearchGlobalIndexResult {
36+
class PAIMON_EXPORT BitmapScoredGlobalIndexResult : public ScoredGlobalIndexResult {
3837
public:
39-
BitmapVectorSearchGlobalIndexResult(RoaringBitmap64&& bitmap, std::vector<float>&& scores)
38+
BitmapScoredGlobalIndexResult(RoaringBitmap64&& bitmap, std::vector<float>&& scores)
4039
: bitmap_(std::move(bitmap)), scores_(std::move(scores)) {
4140
assert(static_cast<size_t>(bitmap_.Cardinality()) == scores_.size());
4241
}
4342

44-
class VectorSearchIterator : public VectorSearchGlobalIndexResult::VectorSearchIterator {
43+
class ScoredIterator : public ScoredGlobalIndexResult::ScoredIterator {
4544
public:
46-
VectorSearchIterator(const RoaringBitmap64* bitmap, RoaringBitmap64::Iterator&& iter,
47-
const float* scores)
45+
ScoredIterator(const RoaringBitmap64* bitmap, RoaringBitmap64::Iterator&& iter,
46+
const float* scores)
4847
: bitmap_(bitmap), iter_(std::move(iter)), scores_(scores) {}
4948

5049
bool HasNext() const override {
@@ -66,8 +65,8 @@ class PAIMON_EXPORT BitmapVectorSearchGlobalIndexResult : public VectorSearchGlo
6665

6766
Result<std::unique_ptr<GlobalIndexResult::Iterator>> CreateIterator() const override;
6867

69-
Result<std::unique_ptr<VectorSearchGlobalIndexResult::VectorSearchIterator>>
70-
CreateVectorSearchIterator() const override;
68+
Result<std::unique_ptr<ScoredGlobalIndexResult::ScoredIterator>> CreateScoredIterator()
69+
const override;
7170

7271
Result<std::shared_ptr<GlobalIndexResult>> And(
7372
const std::shared_ptr<GlobalIndexResult>& other) override;
@@ -90,7 +89,6 @@ class PAIMON_EXPORT BitmapVectorSearchGlobalIndexResult : public VectorSearchGlo
9089
const std::vector<float>& GetScores() const;
9190

9291
private:
93-
// TODO(xinyu.lxy): may use pair<int64_t, float>
9492
RoaringBitmap64 bitmap_;
9593
// ordered by row id
9694
std::vector<float> scores_;

include/paimon/global_index/global_index_reader.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2024-present Alibaba Inc.
2+
* Copyright 2026-present Alibaba Inc.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
2121
#include <vector>
2222

2323
#include "paimon/global_index/global_index_result.h"
24+
#include "paimon/predicate/full_text_search.h"
2425
#include "paimon/predicate/function_visitor.h"
2526
#include "paimon/predicate/vector_search.h"
2627
#include "paimon/visibility.h"
@@ -40,9 +41,13 @@ class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor<std::shared_ptr<G
4041
/// VisitVectorSearch performs approximate vector similarity search.
4142
/// @warning `VisitVectorSearch` may return error status when it is incorrectly invoked (e.g.,
4243
/// BitmapGlobalIndexReader call `VisitVectorSearch`).
43-
virtual Result<std::shared_ptr<VectorSearchGlobalIndexResult>> VisitVectorSearch(
44+
virtual Result<std::shared_ptr<ScoredGlobalIndexResult>> VisitVectorSearch(
4445
const std::shared_ptr<VectorSearch>& vector_search) = 0;
4546

47+
/// VisitFullTextSearch performs full text search.
48+
virtual Result<std::shared_ptr<GlobalIndexResult>> VisitFullTextSearch(
49+
const std::shared_ptr<FullTextSearch>& full_text_search) = 0;
50+
4651
/// @return true if the reader is thread-safe; false otherwise.
4752
virtual bool IsThreadSafe() const = 0;
4853

include/paimon/global_index/global_index_result.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2024-present Alibaba Inc.
2+
* Copyright 2026-present Alibaba Inc.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -76,7 +76,7 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this<Glob
7676
/// Serializes a GlobalIndexResult object into a byte array.
7777
///
7878
/// @note This method only supports the following concrete implementations:
79-
/// - BitmapVectorSearchGlobalIndexResult
79+
/// - BitmapScoredGlobalIndexResult
8080
/// - BitmapGlobalIndexResult
8181
///
8282
/// @param global_index_result The GlobalIndexResult instance to serialize (must not be null).
@@ -91,7 +91,7 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this<Glob
9191
///
9292
/// @note The concrete type of the deserialized object is determined by metadata
9393
/// embedded in the buffer. Currently, only the following types are supported:
94-
/// - BitmapVectorSearchGlobalIndexResult
94+
/// - BitmapScoredGlobalIndexResult
9595
/// - BitmapGlobalIndexResult
9696
///
9797
/// @param buffer Pointer to the serialized byte data (must not be null).
@@ -106,18 +106,18 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this<Glob
106106
static constexpr int32_t VERSION = 1;
107107
};
108108

109-
/// Represents the result of a vector search query against a global index.
109+
/// Represents the result with score of a query against a global index.
110110
/// This class encapsulates a set of search candidates (row id + score pairs) and provides
111111
/// an iterator interface to traverse them.
112-
class PAIMON_EXPORT VectorSearchGlobalIndexResult : public GlobalIndexResult {
112+
class PAIMON_EXPORT ScoredGlobalIndexResult : public GlobalIndexResult {
113113
public:
114-
/// An iterator over the vector search results, returning (row_id, score) pairs.
114+
/// An iterator over the scored results, returning (row_id, score) pairs.
115115
///
116116
/// @note The results are **NOT sorted by score**. Instead, they are returned in **ascending
117117
/// order of row_id**.
118-
class VectorSearchIterator {
118+
class ScoredIterator {
119119
public:
120-
virtual ~VectorSearchIterator() = default;
120+
virtual ~ScoredIterator() = default;
121121

122122
/// Checks whether more row ids are available.
123123
virtual bool HasNext() const = 0;
@@ -132,7 +132,7 @@ class PAIMON_EXPORT VectorSearchGlobalIndexResult : public GlobalIndexResult {
132132
virtual std::pair<int64_t, float> NextWithScore() = 0;
133133
};
134134

135-
/// Creates a new iterator for traversing the vector search results.
136-
virtual Result<std::unique_ptr<VectorSearchIterator>> CreateVectorSearchIterator() const = 0;
135+
/// Creates a new iterator for traversing the scored results.
136+
virtual Result<std::unique_ptr<ScoredIterator>> CreateScoredIterator() const = 0;
137137
};
138138
} // namespace paimon

src/paimon/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ set(PAIMON_COMMON_SRCS
5151
common/fs/file_system_factory.cpp
5252
common/global_config.cpp
5353
common/global_index/complete_index_score_batch_reader.cpp
54-
common/global_index/bitmap_vector_search_global_index_result.cpp
54+
common/global_index/bitmap_scored_global_index_result.cpp
5555
common/global_index/bitmap_global_index_result.cpp
5656
common/global_index/global_index_result.cpp
5757
common/global_index/global_indexer_factory.cpp
@@ -359,7 +359,7 @@ if(PAIMON_BUILD_TESTS)
359359
common/global_index/global_index_result_test.cpp
360360
common/global_index/global_indexer_factory_test.cpp
361361
common/global_index/bitmap_global_index_result_test.cpp
362-
common/global_index/bitmap_vector_search_global_index_result_test.cpp
362+
common/global_index/bitmap_scored_global_index_result_test.cpp
363363
common/global_index/bitmap/bitmap_global_index_test.cpp
364364
common/io/byte_array_input_stream_test.cpp
365365
common/io/data_input_output_stream_test.cpp

src/paimon/common/global_index/bitmap_vector_search_global_index_result.cpp renamed to src/paimon/common/global_index/bitmap_scored_global_index_result.cpp

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2024-present Alibaba Inc.
2+
* Copyright 2026-present Alibaba Inc.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
1414
* limitations under the License.
1515
*/
1616

17-
#include "paimon/global_index/bitmap_vector_search_global_index_result.h"
17+
#include "paimon/global_index/bitmap_scored_global_index_result.h"
1818

1919
#include "fmt/format.h"
2020
#include "fmt/ranges.h"
@@ -41,78 +41,76 @@ std::vector<float> GetScoresFromMap(const RoaringBitmap64& bitmap,
4141
return scores;
4242
}
4343
} // namespace
44-
Result<std::unique_ptr<GlobalIndexResult::Iterator>>
45-
BitmapVectorSearchGlobalIndexResult::CreateIterator() const {
44+
Result<std::unique_ptr<GlobalIndexResult::Iterator>> BitmapScoredGlobalIndexResult::CreateIterator()
45+
const {
4646
return std::make_unique<BitmapGlobalIndexResult::Iterator>(&bitmap_, bitmap_.Begin());
4747
}
4848

49-
Result<std::unique_ptr<VectorSearchGlobalIndexResult::VectorSearchIterator>>
50-
BitmapVectorSearchGlobalIndexResult::CreateVectorSearchIterator() const {
51-
return std::make_unique<BitmapVectorSearchGlobalIndexResult::VectorSearchIterator>(
49+
Result<std::unique_ptr<ScoredGlobalIndexResult::ScoredIterator>>
50+
BitmapScoredGlobalIndexResult::CreateScoredIterator() const {
51+
return std::make_unique<BitmapScoredGlobalIndexResult::ScoredIterator>(
5252
&bitmap_, bitmap_.Begin(), scores_.data());
5353
}
5454

55-
Result<std::shared_ptr<GlobalIndexResult>> BitmapVectorSearchGlobalIndexResult::And(
55+
Result<std::shared_ptr<GlobalIndexResult>> BitmapScoredGlobalIndexResult::And(
5656
const std::shared_ptr<GlobalIndexResult>& other) {
57-
auto vector_search_other =
58-
std::dynamic_pointer_cast<BitmapVectorSearchGlobalIndexResult>(other);
59-
if (vector_search_other) {
60-
// If current and other result are both BitmapVectorSearchGlobalIndexResult, return
57+
auto scored_other = std::dynamic_pointer_cast<BitmapScoredGlobalIndexResult>(other);
58+
if (scored_other) {
59+
// If current and other result are both BitmapScoredGlobalIndexResult, return
6160
// BitmapGlobalIndexResult. Erase scores to prevent the same row id with different
6261
// scores in current and other results.
63-
auto supplier = [vector_search_other,
64-
result = std::dynamic_pointer_cast<BitmapVectorSearchGlobalIndexResult>(
62+
auto supplier = [scored_other,
63+
result = std::dynamic_pointer_cast<BitmapScoredGlobalIndexResult>(
6564
shared_from_this())]() -> Result<RoaringBitmap64> {
66-
PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* r1, vector_search_other->GetBitmap());
65+
PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* r1, scored_other->GetBitmap());
6766
PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* r2, result->GetBitmap());
6867
return RoaringBitmap64::And(*r1, *r2);
6968
};
7069
return std::make_shared<BitmapGlobalIndexResult>(supplier);
7170
}
7271
auto bitmap_other = std::dynamic_pointer_cast<BitmapGlobalIndexResult>(other);
7372
if (bitmap_other) {
74-
// If other bitmap is BitmapGlobalIndexResult, return BitmapVectorSearchGlobalIndexResult as
75-
// score must exist in current vector search result.
73+
// If other bitmap is BitmapGlobalIndexResult, return BitmapScoredGlobalIndexResult as
74+
// score must exist in current scored result.
7675
std::map<int64_t, float> id_to_score = CreateIdToScoreMap(bitmap_, scores_);
7776
PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* other_bitmap, bitmap_other->GetBitmap());
7877
auto and_bitmap = RoaringBitmap64::And(bitmap_, *other_bitmap);
7978
std::vector<float> and_scores = GetScoresFromMap(and_bitmap, id_to_score);
80-
return std::make_shared<BitmapVectorSearchGlobalIndexResult>(std::move(and_bitmap),
81-
std::move(and_scores));
79+
return std::make_shared<BitmapScoredGlobalIndexResult>(std::move(and_bitmap),
80+
std::move(and_scores));
8281
}
8382
return GlobalIndexResult::And(other);
8483
}
8584

86-
Result<std::shared_ptr<GlobalIndexResult>> BitmapVectorSearchGlobalIndexResult::Or(
85+
Result<std::shared_ptr<GlobalIndexResult>> BitmapScoredGlobalIndexResult::Or(
8786
const std::shared_ptr<GlobalIndexResult>& other) {
88-
auto vector_search_other =
89-
std::dynamic_pointer_cast<BitmapVectorSearchGlobalIndexResult>(other);
90-
if (vector_search_other) {
91-
// If current and other result are both BitmapVectorSearchGlobalIndexResult, return
92-
// BitmapVectorSearchGlobalIndexResult when current and other have has no intersection row
87+
auto scored_other = std::dynamic_pointer_cast<BitmapScoredGlobalIndexResult>(other);
88+
if (scored_other) {
89+
// If current and other result are both BitmapScoredGlobalIndexResult, return
90+
// BitmapScoredGlobalIndexResult when current and other have has no intersection row
9391
// id.
9492
std::map<int64_t, float> id_to_score = CreateIdToScoreMap(bitmap_, scores_);
9593
size_t idx = 0;
96-
for (auto iter = vector_search_other->bitmap_.Begin();
97-
iter != vector_search_other->bitmap_.End(); ++iter, ++idx) {
94+
for (auto iter = scored_other->bitmap_.Begin(); iter != scored_other->bitmap_.End();
95+
++iter, ++idx) {
9896
if (id_to_score.find(*iter) != id_to_score.end()) {
9997
return Status::Invalid(
100-
"not support two BitmapVectorSearchGlobalIndexResult or with same row id");
98+
"not support two BitmapScoredGlobalIndexResult or with same row id");
10199
}
102-
id_to_score[*iter] = vector_search_other->scores_[idx];
100+
id_to_score[*iter] = scored_other->scores_[idx];
103101
}
104-
auto or_bitmap = RoaringBitmap64::Or(bitmap_, vector_search_other->bitmap_);
102+
auto or_bitmap = RoaringBitmap64::Or(bitmap_, scored_other->bitmap_);
105103
std::vector<float> or_scores = GetScoresFromMap(or_bitmap, id_to_score);
106-
return std::make_shared<BitmapVectorSearchGlobalIndexResult>(std::move(or_bitmap),
107-
std::move(or_scores));
104+
return std::make_shared<BitmapScoredGlobalIndexResult>(std::move(or_bitmap),
105+
std::move(or_scores));
108106
}
109107

110108
auto bitmap_other = std::dynamic_pointer_cast<BitmapGlobalIndexResult>(other);
111109
if (bitmap_other) {
112110
// If other bitmap is BitmapGlobalIndexResult, return BitmapGlobalIndexResult as
113111
// score for union row id is unknown.
114112
auto supplier = [bitmap_other,
115-
result = std::dynamic_pointer_cast<BitmapVectorSearchGlobalIndexResult>(
113+
result = std::dynamic_pointer_cast<BitmapScoredGlobalIndexResult>(
116114
shared_from_this())]() -> Result<RoaringBitmap64> {
117115
PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* r1, bitmap_other->GetBitmap());
118116
PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* r2, result->GetBitmap());
@@ -123,31 +121,30 @@ Result<std::shared_ptr<GlobalIndexResult>> BitmapVectorSearchGlobalIndexResult::
123121
return GlobalIndexResult::Or(other);
124122
}
125123

126-
Result<std::shared_ptr<GlobalIndexResult>> BitmapVectorSearchGlobalIndexResult::AddOffset(
124+
Result<std::shared_ptr<GlobalIndexResult>> BitmapScoredGlobalIndexResult::AddOffset(
127125
int64_t offset) {
128126
PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* bitmap, GetBitmap());
129127
RoaringBitmap64 bitmap64;
130128
for (auto iter = bitmap->Begin(); iter != bitmap->End(); ++iter) {
131129
bitmap64.Add(offset + (*iter));
132130
}
133131
auto scores = GetScores();
134-
return std::make_shared<BitmapVectorSearchGlobalIndexResult>(std::move(bitmap64),
135-
std::move(scores));
132+
return std::make_shared<BitmapScoredGlobalIndexResult>(std::move(bitmap64), std::move(scores));
136133
}
137134

138-
Result<bool> BitmapVectorSearchGlobalIndexResult::IsEmpty() const {
135+
Result<bool> BitmapScoredGlobalIndexResult::IsEmpty() const {
139136
return bitmap_.IsEmpty();
140137
}
141138

142-
Result<const RoaringBitmap64*> BitmapVectorSearchGlobalIndexResult::GetBitmap() const {
139+
Result<const RoaringBitmap64*> BitmapScoredGlobalIndexResult::GetBitmap() const {
143140
return &bitmap_;
144141
}
145142

146-
const std::vector<float>& BitmapVectorSearchGlobalIndexResult::GetScores() const {
143+
const std::vector<float>& BitmapScoredGlobalIndexResult::GetScores() const {
147144
return scores_;
148145
}
149146

150-
std::string BitmapVectorSearchGlobalIndexResult::ToString() const {
147+
std::string BitmapScoredGlobalIndexResult::ToString() const {
151148
std::vector<std::string> formatted_scores;
152149
formatted_scores.reserve(scores_.size());
153150
for (const auto& score : scores_) {

0 commit comments

Comments
 (0)