From a82b60ef669896046df90690a4037872057724d8 Mon Sep 17 00:00:00 2001 From: Alex Gaetano Padula Date: Fri, 20 Feb 2026 10:04:33 -0500 Subject: [PATCH] addition of range cost, updated pyproject --- pyproject.toml | 2 +- src/tidesdb/__init__.py | 2 + src/tidesdb/tidesdb.py | 86 ++++++++++++++++++++++++++++ tests/test_tidesdb.py | 122 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 67d0a3f..825cf26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "tidesdb" -version = "0.9.2" +version = "0.9.3" description = "Official Python bindings for TidesDB - A high-performance embedded key-value storage engine" readme = "README.md" requires-python = ">=3.10" diff --git a/src/tidesdb/__init__.py b/src/tidesdb/__init__.py index 531b64c..1636984 100644 --- a/src/tidesdb/__init__.py +++ b/src/tidesdb/__init__.py @@ -24,6 +24,7 @@ default_config, default_column_family_config, save_config_to_ini, + load_config_from_ini, COMPARATOR_FUNC, ) @@ -45,5 +46,6 @@ "default_config", "default_column_family_config", "save_config_to_ini", + "load_config_from_ini", "COMPARATOR_FUNC", ] diff --git a/src/tidesdb/tidesdb.py b/src/tidesdb/tidesdb.py index 0a1eb2f..c589634 100644 --- a/src/tidesdb/tidesdb.py +++ b/src/tidesdb/tidesdb.py @@ -400,6 +400,19 @@ class _CCacheStats(Structure): _lib.tidesdb_is_compacting.argtypes = [c_void_p] _lib.tidesdb_is_compacting.restype = c_int +_lib.tidesdb_range_cost.argtypes = [ + c_void_p, + POINTER(c_uint8), + c_size_t, + POINTER(c_uint8), + c_size_t, + POINTER(c_double), +] +_lib.tidesdb_range_cost.restype = c_int + +_lib.tidesdb_cf_config_load_from_ini.argtypes = [c_char_p, c_char_p, POINTER(_CColumnFamilyConfig)] +_lib.tidesdb_cf_config_load_from_ini.restype = c_int + # Comparator function type: int (*)(const uint8_t*, size_t, const uint8_t*, size_t, void*) COMPARATOR_FUNC = ctypes.CFUNCTYPE(c_int, POINTER(c_uint8), c_size_t, POINTER(c_uint8), c_size_t, c_void_p) DESTROY_FUNC = ctypes.CFUNCTYPE(None, c_void_p) @@ -570,6 +583,49 @@ def save_config_to_ini(file_path: str, cf_name: str, config: ColumnFamilyConfig) raise TidesDBError.from_code(result, "failed to save config to INI file") +def load_config_from_ini(file_path: str, cf_name: str) -> ColumnFamilyConfig: + """ + Load column family configuration from an INI file. + + Args: + file_path: Path to the INI file to read + cf_name: Name of the section to read (column family name) + + Returns: + ColumnFamilyConfig populated from the INI file + """ + c_config = _CColumnFamilyConfig() + result = _lib.tidesdb_cf_config_load_from_ini( + file_path.encode("utf-8"), cf_name.encode("utf-8"), ctypes.byref(c_config) + ) + if result != TDB_SUCCESS: + raise TidesDBError.from_code(result, "failed to load config from INI file") + + return ColumnFamilyConfig( + write_buffer_size=c_config.write_buffer_size, + level_size_ratio=c_config.level_size_ratio, + min_levels=c_config.min_levels, + dividing_level_offset=c_config.dividing_level_offset, + klog_value_threshold=c_config.klog_value_threshold, + compression_algorithm=CompressionAlgorithm(c_config.compression_algorithm), + enable_bloom_filter=bool(c_config.enable_bloom_filter), + bloom_fpr=c_config.bloom_fpr, + enable_block_indexes=bool(c_config.enable_block_indexes), + index_sample_ratio=c_config.index_sample_ratio, + block_index_prefix_len=c_config.block_index_prefix_len, + sync_mode=SyncMode(c_config.sync_mode), + sync_interval_us=c_config.sync_interval_us, + comparator_name=c_config.comparator_name.decode("utf-8").rstrip("\x00"), + skip_list_max_level=c_config.skip_list_max_level, + skip_list_probability=c_config.skip_list_probability, + default_isolation_level=IsolationLevel(c_config.default_isolation_level), + min_disk_space=c_config.min_disk_space, + l1_file_count_trigger=c_config.l1_file_count_trigger, + l0_queue_stall_threshold=c_config.l0_queue_stall_threshold, + use_btree=bool(c_config.use_btree), + ) + + class Iterator: """Iterator for traversing key-value pairs in a column family.""" @@ -742,6 +798,36 @@ def update_runtime_config(self, config: ColumnFamilyConfig, persist_to_disk: boo if result != TDB_SUCCESS: raise TidesDBError.from_code(result, "failed to update runtime config") + def range_cost(self, key_a: bytes, key_b: bytes) -> float: + """ + Estimate the computational cost of iterating between two keys. + + The returned value is an opaque double meaningful only for comparison + with other values from the same function. It uses only in-memory metadata + and performs no disk I/O. Key order does not matter. + + Args: + key_a: First key (bound of range) + key_b: Second key (bound of range) + + Returns: + Estimated traversal cost (higher = more expensive) + + Raises: + TidesDBError: If arguments are invalid (NULL pointers, zero-length keys) + """ + key_a_buf = (c_uint8 * len(key_a)).from_buffer_copy(key_a) if key_a else None + key_b_buf = (c_uint8 * len(key_b)).from_buffer_copy(key_b) if key_b else None + cost = c_double() + + result = _lib.tidesdb_range_cost( + self._cf, key_a_buf, len(key_a), key_b_buf, len(key_b), ctypes.byref(cost) + ) + if result != TDB_SUCCESS: + raise TidesDBError.from_code(result, "failed to estimate range cost") + + return cost.value + def get_stats(self) -> Stats: """Get statistics for this column family.""" stats_ptr = POINTER(_CStats)() diff --git a/tests/test_tidesdb.py b/tests/test_tidesdb.py index a25d72f..6111c21 100644 --- a/tests/test_tidesdb.py +++ b/tests/test_tidesdb.py @@ -589,5 +589,127 @@ def test_checkpoint_independence(self, db, cf, temp_db_path): shutil.rmtree(checkpoint_dir, ignore_errors=True) +class TestRangeCost: + """Tests for range cost estimation.""" + + def test_range_cost_returns_float(self, db, cf): + """Test that range_cost returns a float value.""" + with db.begin_txn() as txn: + txn.put(cf, b"key_a", b"value_a") + txn.put(cf, b"key_z", b"value_z") + txn.commit() + + cost = cf.range_cost(b"key_a", b"key_z") + assert isinstance(cost, float) + assert cost >= 0.0 + + def test_range_cost_empty_cf(self, db, cf): + """Test range_cost on an empty column family.""" + cost = cf.range_cost(b"a", b"z") + assert isinstance(cost, float) + assert cost >= 0.0 + + def test_range_cost_key_order_irrelevant(self, db, cf): + """Test that key order does not matter.""" + with db.begin_txn() as txn: + txn.put(cf, b"aaa", b"1") + txn.put(cf, b"zzz", b"2") + txn.commit() + + cost_ab = cf.range_cost(b"aaa", b"zzz") + cost_ba = cf.range_cost(b"zzz", b"aaa") + assert cost_ab == cost_ba + + def test_range_cost_narrow_vs_wide(self, db, cf): + """Test that a wider range costs at least as much as a narrow one.""" + with db.begin_txn() as txn: + for i in range(50): + txn.put(cf, f"key:{i:04d}".encode(), f"val:{i}".encode()) + txn.commit() + + narrow = cf.range_cost(b"key:0010", b"key:0015") + wide = cf.range_cost(b"key:0000", b"key:0049") + # Wide range should generally cost >= narrow range + assert wide >= narrow + + def test_range_cost_comparison(self, db, cf): + """Test comparing costs of different ranges.""" + with db.begin_txn() as txn: + for i in range(100): + txn.put(cf, f"user:{i:04d}".encode(), f"data:{i}".encode()) + txn.commit() + + cost_a = cf.range_cost(b"user:0000", b"user:0009") + cost_b = cf.range_cost(b"user:0000", b"user:0099") + # Both should be valid floats + assert isinstance(cost_a, float) + assert isinstance(cost_b, float) + + +class TestLoadConfigFromIni: + """Tests for loading column family config from INI files.""" + + def test_save_and_load_roundtrip(self, temp_db_path): + """Test that saving and loading config produces equivalent results.""" + original = tidesdb.default_column_family_config() + original.write_buffer_size = 32 * 1024 * 1024 + original.compression_algorithm = tidesdb.CompressionAlgorithm.ZSTD_COMPRESSION + original.enable_bloom_filter = True + original.bloom_fpr = 0.001 + original.sync_mode = tidesdb.SyncMode.SYNC_FULL + original.min_levels = 7 + original.use_btree = True + + ini_path = os.path.join(temp_db_path, "test_config.ini") + tidesdb.save_config_to_ini(ini_path, "my_cf", original) + + loaded = tidesdb.load_config_from_ini(ini_path, "my_cf") + + assert loaded.write_buffer_size == original.write_buffer_size + assert loaded.compression_algorithm == original.compression_algorithm + assert loaded.enable_bloom_filter == original.enable_bloom_filter + assert abs(loaded.bloom_fpr - original.bloom_fpr) < 1e-9 + assert loaded.sync_mode == original.sync_mode + assert loaded.min_levels == original.min_levels + assert loaded.use_btree == original.use_btree + + def test_load_nonexistent_file_raises(self, temp_db_path): + """Test that loading from a non-existent file raises error.""" + ini_path = os.path.join(temp_db_path, "nonexistent.ini") + with pytest.raises(tidesdb.TidesDBError): + tidesdb.load_config_from_ini(ini_path, "my_cf") + + def test_load_preserves_all_fields(self, temp_db_path): + """Test that all configuration fields survive a save/load roundtrip.""" + original = tidesdb.default_column_family_config() + original.level_size_ratio = 8 + original.dividing_level_offset = 3 + original.klog_value_threshold = 1024 + original.index_sample_ratio = 2 + original.block_index_prefix_len = 32 + original.sync_interval_us = 500000 + original.skip_list_max_level = 16 + original.skip_list_probability = 0.5 + original.min_disk_space = 200 * 1024 * 1024 + original.l1_file_count_trigger = 8 + original.l0_queue_stall_threshold = 30 + + ini_path = os.path.join(temp_db_path, "full_config.ini") + tidesdb.save_config_to_ini(ini_path, "full_cf", original) + + loaded = tidesdb.load_config_from_ini(ini_path, "full_cf") + + assert loaded.level_size_ratio == original.level_size_ratio + assert loaded.dividing_level_offset == original.dividing_level_offset + assert loaded.klog_value_threshold == original.klog_value_threshold + assert loaded.index_sample_ratio == original.index_sample_ratio + assert loaded.block_index_prefix_len == original.block_index_prefix_len + assert loaded.sync_interval_us == original.sync_interval_us + assert loaded.skip_list_max_level == original.skip_list_max_level + assert loaded.min_disk_space == original.min_disk_space + assert loaded.l1_file_count_trigger == original.l1_file_count_trigger + assert loaded.l0_queue_stall_threshold == original.l0_queue_stall_threshold + + if __name__ == "__main__": pytest.main([__file__, "-v"])