Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "tidesdb"
version = "0.9.2"
version = "0.9.3"
description = "Official Python bindings for TidesDB - A high-performance embedded key-value storage engine"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
2 changes: 2 additions & 0 deletions src/tidesdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
default_config,
default_column_family_config,
save_config_to_ini,
load_config_from_ini,
COMPARATOR_FUNC,
)

Expand All @@ -45,5 +46,6 @@
"default_config",
"default_column_family_config",
"save_config_to_ini",
"load_config_from_ini",
"COMPARATOR_FUNC",
]
86 changes: 86 additions & 0 deletions src/tidesdb/tidesdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,19 @@ class _CCacheStats(Structure):
_lib.tidesdb_is_compacting.argtypes = [c_void_p]
_lib.tidesdb_is_compacting.restype = c_int

_lib.tidesdb_range_cost.argtypes = [
c_void_p,
POINTER(c_uint8),
c_size_t,
POINTER(c_uint8),
c_size_t,
POINTER(c_double),
]
_lib.tidesdb_range_cost.restype = c_int

_lib.tidesdb_cf_config_load_from_ini.argtypes = [c_char_p, c_char_p, POINTER(_CColumnFamilyConfig)]
_lib.tidesdb_cf_config_load_from_ini.restype = c_int

# Comparator function type: int (*)(const uint8_t*, size_t, const uint8_t*, size_t, void*)
COMPARATOR_FUNC = ctypes.CFUNCTYPE(c_int, POINTER(c_uint8), c_size_t, POINTER(c_uint8), c_size_t, c_void_p)
DESTROY_FUNC = ctypes.CFUNCTYPE(None, c_void_p)
Expand Down Expand Up @@ -570,6 +583,49 @@ def save_config_to_ini(file_path: str, cf_name: str, config: ColumnFamilyConfig)
raise TidesDBError.from_code(result, "failed to save config to INI file")


def load_config_from_ini(file_path: str, cf_name: str) -> ColumnFamilyConfig:
"""
Load column family configuration from an INI file.

Args:
file_path: Path to the INI file to read
cf_name: Name of the section to read (column family name)

Returns:
ColumnFamilyConfig populated from the INI file
"""
c_config = _CColumnFamilyConfig()
result = _lib.tidesdb_cf_config_load_from_ini(
file_path.encode("utf-8"), cf_name.encode("utf-8"), ctypes.byref(c_config)
)
if result != TDB_SUCCESS:
raise TidesDBError.from_code(result, "failed to load config from INI file")

return ColumnFamilyConfig(
write_buffer_size=c_config.write_buffer_size,
level_size_ratio=c_config.level_size_ratio,
min_levels=c_config.min_levels,
dividing_level_offset=c_config.dividing_level_offset,
klog_value_threshold=c_config.klog_value_threshold,
compression_algorithm=CompressionAlgorithm(c_config.compression_algorithm),
enable_bloom_filter=bool(c_config.enable_bloom_filter),
bloom_fpr=c_config.bloom_fpr,
enable_block_indexes=bool(c_config.enable_block_indexes),
index_sample_ratio=c_config.index_sample_ratio,
block_index_prefix_len=c_config.block_index_prefix_len,
sync_mode=SyncMode(c_config.sync_mode),
sync_interval_us=c_config.sync_interval_us,
comparator_name=c_config.comparator_name.decode("utf-8").rstrip("\x00"),
skip_list_max_level=c_config.skip_list_max_level,
skip_list_probability=c_config.skip_list_probability,
default_isolation_level=IsolationLevel(c_config.default_isolation_level),
min_disk_space=c_config.min_disk_space,
l1_file_count_trigger=c_config.l1_file_count_trigger,
l0_queue_stall_threshold=c_config.l0_queue_stall_threshold,
use_btree=bool(c_config.use_btree),
)


class Iterator:
"""Iterator for traversing key-value pairs in a column family."""

Expand Down Expand Up @@ -742,6 +798,36 @@ def update_runtime_config(self, config: ColumnFamilyConfig, persist_to_disk: boo
if result != TDB_SUCCESS:
raise TidesDBError.from_code(result, "failed to update runtime config")

def range_cost(self, key_a: bytes, key_b: bytes) -> float:
"""
Estimate the computational cost of iterating between two keys.

The returned value is an opaque double meaningful only for comparison
with other values from the same function. It uses only in-memory metadata
and performs no disk I/O. Key order does not matter.

Args:
key_a: First key (bound of range)
key_b: Second key (bound of range)

Returns:
Estimated traversal cost (higher = more expensive)

Raises:
TidesDBError: If arguments are invalid (NULL pointers, zero-length keys)
"""
key_a_buf = (c_uint8 * len(key_a)).from_buffer_copy(key_a) if key_a else None
key_b_buf = (c_uint8 * len(key_b)).from_buffer_copy(key_b) if key_b else None
cost = c_double()

result = _lib.tidesdb_range_cost(
self._cf, key_a_buf, len(key_a), key_b_buf, len(key_b), ctypes.byref(cost)
)
if result != TDB_SUCCESS:
raise TidesDBError.from_code(result, "failed to estimate range cost")

return cost.value

def get_stats(self) -> Stats:
"""Get statistics for this column family."""
stats_ptr = POINTER(_CStats)()
Expand Down
122 changes: 122 additions & 0 deletions tests/test_tidesdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,5 +589,127 @@ def test_checkpoint_independence(self, db, cf, temp_db_path):
shutil.rmtree(checkpoint_dir, ignore_errors=True)


class TestRangeCost:
"""Tests for range cost estimation."""

def test_range_cost_returns_float(self, db, cf):
"""Test that range_cost returns a float value."""
with db.begin_txn() as txn:
txn.put(cf, b"key_a", b"value_a")
txn.put(cf, b"key_z", b"value_z")
txn.commit()

cost = cf.range_cost(b"key_a", b"key_z")
assert isinstance(cost, float)
assert cost >= 0.0

def test_range_cost_empty_cf(self, db, cf):
"""Test range_cost on an empty column family."""
cost = cf.range_cost(b"a", b"z")
assert isinstance(cost, float)
assert cost >= 0.0

def test_range_cost_key_order_irrelevant(self, db, cf):
"""Test that key order does not matter."""
with db.begin_txn() as txn:
txn.put(cf, b"aaa", b"1")
txn.put(cf, b"zzz", b"2")
txn.commit()

cost_ab = cf.range_cost(b"aaa", b"zzz")
cost_ba = cf.range_cost(b"zzz", b"aaa")
assert cost_ab == cost_ba

def test_range_cost_narrow_vs_wide(self, db, cf):
"""Test that a wider range costs at least as much as a narrow one."""
with db.begin_txn() as txn:
for i in range(50):
txn.put(cf, f"key:{i:04d}".encode(), f"val:{i}".encode())
txn.commit()

narrow = cf.range_cost(b"key:0010", b"key:0015")
wide = cf.range_cost(b"key:0000", b"key:0049")
# Wide range should generally cost >= narrow range
assert wide >= narrow

def test_range_cost_comparison(self, db, cf):
"""Test comparing costs of different ranges."""
with db.begin_txn() as txn:
for i in range(100):
txn.put(cf, f"user:{i:04d}".encode(), f"data:{i}".encode())
txn.commit()

cost_a = cf.range_cost(b"user:0000", b"user:0009")
cost_b = cf.range_cost(b"user:0000", b"user:0099")
# Both should be valid floats
assert isinstance(cost_a, float)
assert isinstance(cost_b, float)


class TestLoadConfigFromIni:
"""Tests for loading column family config from INI files."""

def test_save_and_load_roundtrip(self, temp_db_path):
"""Test that saving and loading config produces equivalent results."""
original = tidesdb.default_column_family_config()
original.write_buffer_size = 32 * 1024 * 1024
original.compression_algorithm = tidesdb.CompressionAlgorithm.ZSTD_COMPRESSION
original.enable_bloom_filter = True
original.bloom_fpr = 0.001
original.sync_mode = tidesdb.SyncMode.SYNC_FULL
original.min_levels = 7
original.use_btree = True

ini_path = os.path.join(temp_db_path, "test_config.ini")
tidesdb.save_config_to_ini(ini_path, "my_cf", original)

loaded = tidesdb.load_config_from_ini(ini_path, "my_cf")

assert loaded.write_buffer_size == original.write_buffer_size
assert loaded.compression_algorithm == original.compression_algorithm
assert loaded.enable_bloom_filter == original.enable_bloom_filter
assert abs(loaded.bloom_fpr - original.bloom_fpr) < 1e-9
assert loaded.sync_mode == original.sync_mode
assert loaded.min_levels == original.min_levels
assert loaded.use_btree == original.use_btree

def test_load_nonexistent_file_raises(self, temp_db_path):
"""Test that loading from a non-existent file raises error."""
ini_path = os.path.join(temp_db_path, "nonexistent.ini")
with pytest.raises(tidesdb.TidesDBError):
tidesdb.load_config_from_ini(ini_path, "my_cf")

def test_load_preserves_all_fields(self, temp_db_path):
"""Test that all configuration fields survive a save/load roundtrip."""
original = tidesdb.default_column_family_config()
original.level_size_ratio = 8
original.dividing_level_offset = 3
original.klog_value_threshold = 1024
original.index_sample_ratio = 2
original.block_index_prefix_len = 32
original.sync_interval_us = 500000
original.skip_list_max_level = 16
original.skip_list_probability = 0.5
original.min_disk_space = 200 * 1024 * 1024
original.l1_file_count_trigger = 8
original.l0_queue_stall_threshold = 30

ini_path = os.path.join(temp_db_path, "full_config.ini")
tidesdb.save_config_to_ini(ini_path, "full_cf", original)

loaded = tidesdb.load_config_from_ini(ini_path, "full_cf")

assert loaded.level_size_ratio == original.level_size_ratio
assert loaded.dividing_level_offset == original.dividing_level_offset
assert loaded.klog_value_threshold == original.klog_value_threshold
assert loaded.index_sample_ratio == original.index_sample_ratio
assert loaded.block_index_prefix_len == original.block_index_prefix_len
assert loaded.sync_interval_us == original.sync_interval_us
assert loaded.skip_list_max_level == original.skip_list_max_level
assert loaded.min_disk_space == original.min_disk_space
assert loaded.l1_file_count_trigger == original.l1_file_count_trigger
assert loaded.l0_queue_stall_threshold == original.l0_queue_stall_threshold


if __name__ == "__main__":
pytest.main([__file__, "-v"])
Loading