Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions crates/bench/benches/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ use foldhash::{HashSet, HashSetExt};
use hashbrown::{hash_map::Entry, HashMap};
use itertools::Itertools as _;
use spacetimedb_sats::layout::Size;
use spacetimedb_table::indexes::{PageIndex, PageOffset, RowPointer, SquashedOffset};
use spacetimedb_table::table_index::unique_direct_index::UniqueDirectIndex;
use spacetimedb_table::table_index::uniquemap::UniqueMap;
use spacetimedb_table::table_index::Index as _;
use spacetimedb_table::{
indexes::{PageIndex, PageOffset, RowPointer, SquashedOffset},
table_index::RangedIndex,
};

fn time<R>(body: impl FnOnce() -> R) -> Duration {
let start = WallTime.start();
Expand Down Expand Up @@ -173,20 +177,20 @@ trait Index: Clone {
}

#[derive(Clone)]
struct IBTree(UniqueMap<K, RowPointer>);
struct IBTree(UniqueMap<K>);
impl Index for IBTree {
const NAME: &'static str = "IBTree";
fn new() -> Self {
Self(<_>::default())
}
fn insert(&mut self, key: K, val: RowPointer) -> Result<(), RowPointer> {
self.0.insert(key, val).map_err(|x| *x)
self.0.insert(key, val)
}
fn seek(&self, key: K) -> impl Iterator<Item = RowPointer> {
self.0.values_in_range(&(key..=key)).copied()
self.0.seek_range(&(key..=key))
}
fn delete(&mut self, key: K) -> bool {
self.0.delete(&key)
self.0.delete(&key, RowPointer(0))
}
}

Expand Down Expand Up @@ -249,10 +253,10 @@ impl Index for IDirectIndex {
self.0.insert(key as usize, val)
}
fn seek(&self, key: K) -> impl Iterator<Item = RowPointer> {
self.0.seek_point(key as usize)
self.0.seek_point(&(key as usize))
}
fn delete(&mut self, key: K) -> bool {
self.0.delete(key as usize)
self.0.delete(&(key as usize), RowPointer(0))
}
}

Expand Down
7 changes: 7 additions & 0 deletions crates/table/proptest-regressions/table_index/mod.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 3276d3db4a1a70d78db9a6a01eaa3bba810a2317e9c67e4d5d8d93cbba472c99 # shrinks to ((ty, cols, pv), is_unique) = ((ProductType {None: Bool}, [ColId(0)], ProductValue { elements: [Bool(false)] }), false)
121 changes: 121 additions & 0 deletions crates/table/src/table_index/index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use crate::{indexes::RowPointer, table_index::KeySize};
use core::{mem, ops::RangeBounds};

pub trait Index {
/// The type of keys indexed.
type Key: KeySize;

// =========================================================================
// Construction
// =========================================================================

/// Clones the structure of this index but not the indexed elements,
/// returning an empty index.
fn clone_structure(&self) -> Self;

// =========================================================================
// Mutation
// =========================================================================

/// Inserts the relation `key -> ptr` to this map.
///
/// If `key` was already present in the index,
/// does not add an association with val.
/// Returns the existing associated pointer instead.
fn insert(&mut self, key: Self::Key, ptr: RowPointer) -> Result<(), RowPointer>;

/// Deletes `key -> ptr` from this index.
///
/// Returns whether `key -> ptr` was present.
///
/// Implementations are free to ignore `ptr`
/// if there can only ever be one `key`,
/// as is the case for unique indices.
fn delete(&mut self, key: &Self::Key, ptr: RowPointer) -> bool;

/// Clears all the rows and keys from the index,
/// leaving it empty.
fn clear(&mut self);

// =========================================================================
// Querying
// =========================================================================

/// Returns whether `other` can be merged into `self`
/// with an error containing the element in `self` that caused the violation.
///
/// The closure `ignore` indicates whether a row in `self` should be ignored.
fn can_merge(&self, other: &Self, ignore: impl Fn(&RowPointer) -> bool) -> Result<(), RowPointer>;

/// Returns the number of keys indexed.
///
/// This method runs in constant time.
fn num_keys(&self) -> usize;

/// The number of bytes stored in keys in this index.
///
/// For non-unique indexes, duplicate keys are counted once for each row that refers to them,
/// even though the internal storage may deduplicate them as an optimization.
///
/// This method runs in constant time.
///
/// See the [`KeySize`](super::KeySize) trait for more details on how this method computes its result.
///
/// The provided implementation assumes
/// that the key takes up exactly `size_of::<Self::Key>()` bytes
/// and has no dynamic component.
/// If that is not correct, you should override the implementation.
fn num_key_bytes(&self) -> u64 {
(self.num_keys() * mem::size_of::<Self::Key>()) as u64
}

/// Returns the number of rows indexed.
///
/// When `self.num_keys() == 0` then `self.num_values() == 0`.
///
/// Note that, for non-unique indexes, this may be larger than [`Index::num_keys`].
///
/// This method runs in constant time.
///
/// The provided implementation assumes the index is unique
/// and uses [`Index::num_keys`].
fn num_rows(&self) -> usize {
self.num_keys()
}

/// Returns whether the index has no key or values.
///
/// When `self.is_empty()`
/// then `self.num_keys() == 0` and `self.num_values() == 0`.
///
/// The provided implementation uses [`Index::num_keys`].
fn is_empty(&self) -> bool {
self.num_keys() == 0
}

/// The type of iterator returned by [`Index::seek_point`].
type PointIter<'a>: Iterator<Item = RowPointer>
where
Self: 'a;

/// Seeks `point` in this index,
/// returning an iterator over all the elements.
///
/// If the index is unique, this will at most return one element.
fn seek_point(&self, point: &Self::Key) -> Self::PointIter<'_>;
}

pub trait RangedIndex: Index {
/// The type of iterator returned by [`Index::seek_range`].
type RangeIter<'a>: Iterator<Item = RowPointer>
where
Self: 'a;

/// Seeks the `range` in this index,
/// returning an iterator over all the elements.
///
/// Prefer [`Index::seek_point`] for point scans
/// rather than providing a point `range`
/// as it will be faster.
fn seek_range(&self, range: &impl RangeBounds<Self::Key>) -> Self::RangeIter<'_>;
}
53 changes: 53 additions & 0 deletions crates/table/src/table_index/key_size.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,49 @@
use super::Index;
use core::mem;
use spacetimedb_memory_usage::MemoryUsage;
use spacetimedb_sats::{
algebraic_value::Packed, i256, u256, AlgebraicValue, ArrayValue, ProductValue, SumValue, F32, F64,
};

/// Storage for memoizing `KeySize` statistics.
pub trait KeyBytesStorage: Default + MemoryUsage {
/// Add `key.key_size_in_bytes()` to the statistics.
fn add_to_key_bytes<I: Index>(&mut self, key: &I::Key);

/// Subtract `key.key_size_in_bytes()` from the statistics.
fn sub_from_key_bytes<I: Index>(&mut self, key: &I::Key);

/// Resets the statistics to zero.
fn reset_to_zero(&mut self);

/// Returns the number bytes taken up by the keys of the index.
fn get<I: Index>(&self, index: &I) -> u64;
}

impl KeyBytesStorage for () {
fn add_to_key_bytes<I: Index>(&mut self, _: &I::Key) {}
fn sub_from_key_bytes<I: Index>(&mut self, _: &I::Key) {}
fn reset_to_zero(&mut self) {}
fn get<I: Index>(&self, index: &I) -> u64 {
index.num_keys() as u64 * mem::size_of::<I::Key>() as u64
}
}

impl KeyBytesStorage for u64 {
fn add_to_key_bytes<I: Index>(&mut self, key: &I::Key) {
*self += key.key_size_in_bytes() as u64;
}
fn sub_from_key_bytes<I: Index>(&mut self, key: &I::Key) {
*self -= key.key_size_in_bytes() as u64;
}
fn reset_to_zero(&mut self) {
*self = 0;
}
fn get<I: Index>(&self, _: &I) -> u64 {
*self
}
}

/// Index keys whose memory usage we can measure and report.
///
/// The reported memory usage of an index is based on:
Expand All @@ -25,13 +67,16 @@ use spacetimedb_sats::{
/// - Array values take bytes equal to the sum of their elements' bytes.
/// As with strings, no overhead is counted.
pub trait KeySize {
type MemoStorage: KeyBytesStorage;

fn key_size_in_bytes(&self) -> usize;
}

macro_rules! impl_key_size_primitive {
($prim:ty) => {
impl KeySize for $prim {
fn key_size_in_bytes(&self) -> usize { std::mem::size_of::<Self>() }
type MemoStorage = ();
}
};
($($prim:ty,)*) => {
Expand Down Expand Up @@ -61,12 +106,14 @@ impl_key_size_primitive!(
);

impl KeySize for Box<str> {
type MemoStorage = u64;
fn key_size_in_bytes(&self) -> usize {
self.len()
}
}

impl KeySize for AlgebraicValue {
type MemoStorage = u64;
fn key_size_in_bytes(&self) -> usize {
match self {
AlgebraicValue::Bool(x) => x.key_size_in_bytes(),
Expand Down Expand Up @@ -95,12 +142,14 @@ impl KeySize for AlgebraicValue {
}

impl KeySize for SumValue {
type MemoStorage = u64;
fn key_size_in_bytes(&self) -> usize {
1 + self.value.key_size_in_bytes()
}
}

impl KeySize for ProductValue {
type MemoStorage = u64;
fn key_size_in_bytes(&self) -> usize {
self.elements.key_size_in_bytes()
}
Expand All @@ -110,6 +159,8 @@ impl<K> KeySize for [K]
where
K: KeySize,
{
type MemoStorage = u64;

// TODO(perf, bikeshedding): check that this optimized to `size_of::<K>() * self.len()`
// when `K` is a primitive.
fn key_size_in_bytes(&self) -> usize {
Expand All @@ -118,6 +169,8 @@ where
}

impl KeySize for ArrayValue {
type MemoStorage = u64;

fn key_size_in_bytes(&self) -> usize {
match self {
ArrayValue::Sum(elts) => elts.key_size_in_bytes(),
Expand Down
Loading
Loading