From 5480eef493d7293afd05821b613d305d7c316ddc Mon Sep 17 00:00:00 2001 From: Alex Gaetano Padula Date: Fri, 20 Feb 2026 09:03:02 -0500 Subject: [PATCH] issue #38 addition of range cost estimation api --- CMakeLists.txt | 2 +- include/tidesdb/tidesdb.hpp | 17 +++++++ src/tidesdb.cpp | 19 ++++++++ tests/tidesdb_test.cpp | 94 +++++++++++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 015afa6..9a90c16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.16) -project(tidesdb_cpp VERSION 2.3.1 LANGUAGES CXX) +project(tidesdb_cpp VERSION 2.3.2 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/include/tidesdb/tidesdb.hpp b/include/tidesdb/tidesdb.hpp index f91d4fa..e40aabf 100644 --- a/include/tidesdb/tidesdb.hpp +++ b/include/tidesdb/tidesdb.hpp @@ -305,6 +305,23 @@ class ColumnFamily */ [[nodiscard]] bool isCompacting() const; + /** + * @brief Estimate the computational cost of iterating between two keys + * @param keyA First key (bound of range) + * @param keyB Second key (bound of range) + * @return Estimated traversal cost (higher = more expensive, relative scalar) + */ + [[nodiscard]] double rangeCost(std::string_view keyA, std::string_view keyB) const; + + /** + * @brief Estimate the computational cost of iterating between two keys (byte vector overload) + * @param keyA First key (bound of range) + * @param keyB Second key (bound of range) + * @return Estimated traversal cost (higher = more expensive, relative scalar) + */ + [[nodiscard]] double rangeCost(const std::vector& keyA, + const std::vector& keyB) const; + /** * @brief Update runtime-safe configuration settings * @param config New configuration (only runtime-safe fields are applied) diff --git a/src/tidesdb.cpp b/src/tidesdb.cpp index 35b0c46..7233fe4 100644 --- a/src/tidesdb.cpp +++ b/src/tidesdb.cpp @@ -273,6 +273,25 @@ bool ColumnFamily::isCompacting() const return tidesdb_is_compacting(cf_) != 0; } +double ColumnFamily::rangeCost(std::string_view keyA, std::string_view keyB) const +{ + double cost = 0.0; + int result = + tidesdb_range_cost(cf_, reinterpret_cast(keyA.data()), keyA.size(), + reinterpret_cast(keyB.data()), keyB.size(), &cost); + checkResult(result, "failed to estimate range cost"); + return cost; +} + +double ColumnFamily::rangeCost(const std::vector& keyA, + const std::vector& keyB) const +{ + double cost = 0.0; + int result = tidesdb_range_cost(cf_, keyA.data(), keyA.size(), keyB.data(), keyB.size(), &cost); + checkResult(result, "failed to estimate range cost"); + return cost; +} + void ColumnFamily::updateRuntimeConfig(const ColumnFamilyConfig& config, bool persistToDisk) { tidesdb_column_family_config_t cConfig; diff --git a/tests/tidesdb_test.cpp b/tests/tidesdb_test.cpp index 9b06425..ca9a9d7 100644 --- a/tests/tidesdb_test.cpp +++ b/tests/tidesdb_test.cpp @@ -934,6 +934,100 @@ TEST_F(TidesDBTest, CloneColumnFamilyErrors) EXPECT_THROW(db.cloneColumnFamily("existing_cf", "existing_cf"), tidesdb::Exception); } +TEST_F(TidesDBTest, RangeCost) +{ + tidesdb::TidesDB db(getConfig()); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + cfConfig.writeBufferSize = 1024; // Small buffer to trigger flush + db.createColumnFamily("test_cf", cfConfig); + + auto cf = db.getColumnFamily("test_cf"); + + // Insert data across a key range + { + auto txn = db.beginTransaction(); + for (int i = 0; i < 100; ++i) + { + char key[32], value[64]; + std::snprintf(key, sizeof(key), "user:%04d", i); + std::snprintf(value, sizeof(value), "data_%04d", i); + txn.put(cf, key, value, -1); + } + txn.commit(); + } + + // Flush to create SSTables so range cost has data to estimate + cf.flushMemtable(); + + // Estimate cost for two ranges + double costA = cf.rangeCost("user:0000", "user:0049"); + double costB = cf.rangeCost("user:0000", "user:0099"); + + // Both costs should be non-negative + ASSERT_GE(costA, 0.0); + ASSERT_GE(costB, 0.0); + + // Larger range should have >= cost than smaller range + ASSERT_GE(costB, costA); +} + +TEST_F(TidesDBTest, RangeCostByteVector) +{ + tidesdb::TidesDB db(getConfig()); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + db.createColumnFamily("test_cf", cfConfig); + + auto cf = db.getColumnFamily("test_cf"); + + // Insert some data + { + auto txn = db.beginTransaction(); + for (int i = 0; i < 10; ++i) + { + std::string key = "key" + std::to_string(i); + std::string value = "value" + std::to_string(i); + txn.put(cf, key, value, -1); + } + txn.commit(); + } + + std::vector keyA = {'k', 'e', 'y', '0'}; + std::vector keyB = {'k', 'e', 'y', '9'}; + + double cost = cf.rangeCost(keyA, keyB); + ASSERT_GE(cost, 0.0); +} + +TEST_F(TidesDBTest, RangeCostKeyOrderIrrelevant) +{ + tidesdb::TidesDB db(getConfig()); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + db.createColumnFamily("test_cf", cfConfig); + + auto cf = db.getColumnFamily("test_cf"); + + // Insert some data + { + auto txn = db.beginTransaction(); + for (int i = 0; i < 10; ++i) + { + std::string key = "key" + std::to_string(i); + std::string value = "value" + std::to_string(i); + txn.put(cf, key, value, -1); + } + txn.commit(); + } + + // Key order should not matter per the C API docs + double costAB = cf.rangeCost("key0", "key9"); + double costBA = cf.rangeCost("key9", "key0"); + + ASSERT_DOUBLE_EQ(costAB, costBA); +} + TEST_F(TidesDBTest, TransactionReset) { tidesdb::TidesDB db(getConfig());