From c98532dddbf03b55a0426a2c4a322581bd981a2e Mon Sep 17 00:00:00 2001 From: Mazdak Farrokhzad Date: Fri, 12 Dec 2025 14:28:50 +0100 Subject: [PATCH] provide a common `Index` trait & simplify and shrink `TableIndex` with it --- crates/bench/benches/index.rs | 18 +- .../proptest-regressions/table_index/mod.txt | 7 + crates/table/src/table_index/index.rs | 121 +++ crates/table/src/table_index/key_size.rs | 53 + crates/table/src/table_index/mod.rs | 964 ++++++++---------- crates/table/src/table_index/multimap.rs | 134 ++- .../table/src/table_index/same_key_entry.rs | 52 +- .../unique_direct_fixed_cap_index.rs | 119 ++- .../src/table_index/unique_direct_index.rs | 127 ++- crates/table/src/table_index/uniquemap.rs | 132 +-- 10 files changed, 918 insertions(+), 809 deletions(-) create mode 100644 crates/table/proptest-regressions/table_index/mod.txt create mode 100644 crates/table/src/table_index/index.rs diff --git a/crates/bench/benches/index.rs b/crates/bench/benches/index.rs index 3cd9467d449..b8f74c5564e 100644 --- a/crates/bench/benches/index.rs +++ b/crates/bench/benches/index.rs @@ -8,9 +8,13 @@ use foldhash::{HashSet, HashSetExt}; use hashbrown::{hash_map::Entry, HashMap}; use itertools::Itertools as _; use spacetimedb_sats::layout::Size; -use spacetimedb_table::indexes::{PageIndex, PageOffset, RowPointer, SquashedOffset}; use spacetimedb_table::table_index::unique_direct_index::UniqueDirectIndex; use spacetimedb_table::table_index::uniquemap::UniqueMap; +use spacetimedb_table::table_index::Index as _; +use spacetimedb_table::{ + indexes::{PageIndex, PageOffset, RowPointer, SquashedOffset}, + table_index::RangedIndex, +}; fn time(body: impl FnOnce() -> R) -> Duration { let start = WallTime.start(); @@ -173,20 +177,20 @@ trait Index: Clone { } #[derive(Clone)] -struct IBTree(UniqueMap); +struct IBTree(UniqueMap); impl Index for IBTree { const NAME: &'static str = "IBTree"; fn new() -> Self { Self(<_>::default()) } fn insert(&mut self, key: K, val: RowPointer) -> Result<(), RowPointer> { - self.0.insert(key, val).map_err(|x| *x) + self.0.insert(key, val) } fn seek(&self, key: K) -> impl Iterator { - self.0.values_in_range(&(key..=key)).copied() + self.0.seek_range(&(key..=key)) } fn delete(&mut self, key: K) -> bool { - self.0.delete(&key) + self.0.delete(&key, RowPointer(0)) } } @@ -249,10 +253,10 @@ impl Index for IDirectIndex { self.0.insert(key as usize, val) } fn seek(&self, key: K) -> impl Iterator { - self.0.seek_point(key as usize) + self.0.seek_point(&(key as usize)) } fn delete(&mut self, key: K) -> bool { - self.0.delete(key as usize) + self.0.delete(&(key as usize), RowPointer(0)) } } diff --git a/crates/table/proptest-regressions/table_index/mod.txt b/crates/table/proptest-regressions/table_index/mod.txt new file mode 100644 index 00000000000..d66eff4a279 --- /dev/null +++ b/crates/table/proptest-regressions/table_index/mod.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 3276d3db4a1a70d78db9a6a01eaa3bba810a2317e9c67e4d5d8d93cbba472c99 # shrinks to ((ty, cols, pv), is_unique) = ((ProductType {None: Bool}, [ColId(0)], ProductValue { elements: [Bool(false)] }), false) diff --git a/crates/table/src/table_index/index.rs b/crates/table/src/table_index/index.rs new file mode 100644 index 00000000000..d7035a04e77 --- /dev/null +++ b/crates/table/src/table_index/index.rs @@ -0,0 +1,121 @@ +use crate::{indexes::RowPointer, table_index::KeySize}; +use core::{mem, ops::RangeBounds}; + +pub trait Index { + /// The type of keys indexed. + type Key: KeySize; + + // ========================================================================= + // Construction + // ========================================================================= + + /// Clones the structure of this index but not the indexed elements, + /// returning an empty index. + fn clone_structure(&self) -> Self; + + // ========================================================================= + // Mutation + // ========================================================================= + + /// Inserts the relation `key -> ptr` to this map. + /// + /// If `key` was already present in the index, + /// does not add an association with val. + /// Returns the existing associated pointer instead. + fn insert(&mut self, key: Self::Key, ptr: RowPointer) -> Result<(), RowPointer>; + + /// Deletes `key -> ptr` from this index. + /// + /// Returns whether `key -> ptr` was present. + /// + /// Implementations are free to ignore `ptr` + /// if there can only ever be one `key`, + /// as is the case for unique indices. + fn delete(&mut self, key: &Self::Key, ptr: RowPointer) -> bool; + + /// Clears all the rows and keys from the index, + /// leaving it empty. + fn clear(&mut self); + + // ========================================================================= + // Querying + // ========================================================================= + + /// Returns whether `other` can be merged into `self` + /// with an error containing the element in `self` that caused the violation. + /// + /// The closure `ignore` indicates whether a row in `self` should be ignored. + fn can_merge(&self, other: &Self, ignore: impl Fn(&RowPointer) -> bool) -> Result<(), RowPointer>; + + /// Returns the number of keys indexed. + /// + /// This method runs in constant time. + fn num_keys(&self) -> usize; + + /// The number of bytes stored in keys in this index. + /// + /// For non-unique indexes, duplicate keys are counted once for each row that refers to them, + /// even though the internal storage may deduplicate them as an optimization. + /// + /// This method runs in constant time. + /// + /// See the [`KeySize`](super::KeySize) trait for more details on how this method computes its result. + /// + /// The provided implementation assumes + /// that the key takes up exactly `size_of::()` bytes + /// and has no dynamic component. + /// If that is not correct, you should override the implementation. + fn num_key_bytes(&self) -> u64 { + (self.num_keys() * mem::size_of::()) as u64 + } + + /// Returns the number of rows indexed. + /// + /// When `self.num_keys() == 0` then `self.num_values() == 0`. + /// + /// Note that, for non-unique indexes, this may be larger than [`Index::num_keys`]. + /// + /// This method runs in constant time. + /// + /// The provided implementation assumes the index is unique + /// and uses [`Index::num_keys`]. + fn num_rows(&self) -> usize { + self.num_keys() + } + + /// Returns whether the index has no key or values. + /// + /// When `self.is_empty()` + /// then `self.num_keys() == 0` and `self.num_values() == 0`. + /// + /// The provided implementation uses [`Index::num_keys`]. + fn is_empty(&self) -> bool { + self.num_keys() == 0 + } + + /// The type of iterator returned by [`Index::seek_point`]. + type PointIter<'a>: Iterator + where + Self: 'a; + + /// Seeks `point` in this index, + /// returning an iterator over all the elements. + /// + /// If the index is unique, this will at most return one element. + fn seek_point(&self, point: &Self::Key) -> Self::PointIter<'_>; +} + +pub trait RangedIndex: Index { + /// The type of iterator returned by [`Index::seek_range`]. + type RangeIter<'a>: Iterator + where + Self: 'a; + + /// Seeks the `range` in this index, + /// returning an iterator over all the elements. + /// + /// Prefer [`Index::seek_point`] for point scans + /// rather than providing a point `range` + /// as it will be faster. + fn seek_range(&self, range: &impl RangeBounds) -> Self::RangeIter<'_>; +} diff --git a/crates/table/src/table_index/key_size.rs b/crates/table/src/table_index/key_size.rs index 06cdf1501ea..da5ec7b1078 100644 --- a/crates/table/src/table_index/key_size.rs +++ b/crates/table/src/table_index/key_size.rs @@ -1,7 +1,49 @@ +use super::Index; +use core::mem; +use spacetimedb_memory_usage::MemoryUsage; use spacetimedb_sats::{ algebraic_value::Packed, i256, u256, AlgebraicValue, ArrayValue, ProductValue, SumValue, F32, F64, }; +/// Storage for memoizing `KeySize` statistics. +pub trait KeyBytesStorage: Default + MemoryUsage { + /// Add `key.key_size_in_bytes()` to the statistics. + fn add_to_key_bytes(&mut self, key: &I::Key); + + /// Subtract `key.key_size_in_bytes()` from the statistics. + fn sub_from_key_bytes(&mut self, key: &I::Key); + + /// Resets the statistics to zero. + fn reset_to_zero(&mut self); + + /// Returns the number bytes taken up by the keys of the index. + fn get(&self, index: &I) -> u64; +} + +impl KeyBytesStorage for () { + fn add_to_key_bytes(&mut self, _: &I::Key) {} + fn sub_from_key_bytes(&mut self, _: &I::Key) {} + fn reset_to_zero(&mut self) {} + fn get(&self, index: &I) -> u64 { + index.num_keys() as u64 * mem::size_of::() as u64 + } +} + +impl KeyBytesStorage for u64 { + fn add_to_key_bytes(&mut self, key: &I::Key) { + *self += key.key_size_in_bytes() as u64; + } + fn sub_from_key_bytes(&mut self, key: &I::Key) { + *self -= key.key_size_in_bytes() as u64; + } + fn reset_to_zero(&mut self) { + *self = 0; + } + fn get(&self, _: &I) -> u64 { + *self + } +} + /// Index keys whose memory usage we can measure and report. /// /// The reported memory usage of an index is based on: @@ -25,6 +67,8 @@ use spacetimedb_sats::{ /// - Array values take bytes equal to the sum of their elements' bytes. /// As with strings, no overhead is counted. pub trait KeySize { + type MemoStorage: KeyBytesStorage; + fn key_size_in_bytes(&self) -> usize; } @@ -32,6 +76,7 @@ macro_rules! impl_key_size_primitive { ($prim:ty) => { impl KeySize for $prim { fn key_size_in_bytes(&self) -> usize { std::mem::size_of::() } + type MemoStorage = (); } }; ($($prim:ty,)*) => { @@ -61,12 +106,14 @@ impl_key_size_primitive!( ); impl KeySize for Box { + type MemoStorage = u64; fn key_size_in_bytes(&self) -> usize { self.len() } } impl KeySize for AlgebraicValue { + type MemoStorage = u64; fn key_size_in_bytes(&self) -> usize { match self { AlgebraicValue::Bool(x) => x.key_size_in_bytes(), @@ -95,12 +142,14 @@ impl KeySize for AlgebraicValue { } impl KeySize for SumValue { + type MemoStorage = u64; fn key_size_in_bytes(&self) -> usize { 1 + self.value.key_size_in_bytes() } } impl KeySize for ProductValue { + type MemoStorage = u64; fn key_size_in_bytes(&self) -> usize { self.elements.key_size_in_bytes() } @@ -110,6 +159,8 @@ impl KeySize for [K] where K: KeySize, { + type MemoStorage = u64; + // TODO(perf, bikeshedding): check that this optimized to `size_of::() * self.len()` // when `K` is a primitive. fn key_size_in_bytes(&self) -> usize { @@ -118,6 +169,8 @@ where } impl KeySize for ArrayValue { + type MemoStorage = u64; + fn key_size_in_bytes(&self) -> usize { match self { ArrayValue::Sum(elts) => elts.key_size_in_bytes(), diff --git a/crates/table/src/table_index/mod.rs b/crates/table/src/table_index/mod.rs index ae6b8c12ab5..39c2847655c 100644 --- a/crates/table/src/table_index/mod.rs +++ b/crates/table/src/table_index/mod.rs @@ -25,6 +25,8 @@ /// Additionally, beyond our btree indices, /// we support direct unique indices, where key are indices into `Vec`s. use self::same_key_entry::SameKeyEntryIter; +use self::unique_direct_fixed_cap_index::{UniqueDirectFixedCapIndex, UniqueDirectFixedCapIndexRangeIter}; +use self::unique_direct_index::{UniqueDirectIndex, UniqueDirectIndexPointIter, UniqueDirectIndexRangeIter}; use super::indexes::RowPointer; use super::table::RowRef; use crate::{read_column::ReadColumn, static_assert_size}; @@ -35,7 +37,9 @@ use spacetimedb_sats::{ algebraic_value::Packed, i256, product_value::InvalidFieldError, sum_value::SumTag, u256, AlgebraicType, AlgebraicValue, ProductType, F32, F64, }; +use spacetimedb_schema::def::IndexAlgorithm; +mod index; mod key_size; mod multimap; mod same_key_entry; @@ -43,17 +47,15 @@ pub mod unique_direct_fixed_cap_index; pub mod unique_direct_index; pub mod uniquemap; -pub use key_size::KeySize; -use spacetimedb_schema::def::IndexAlgorithm; -use unique_direct_fixed_cap_index::{UniqueDirectFixedCapIndex, UniqueDirectFixedCapIndexRangeIter}; -use unique_direct_index::{UniqueDirectIndex, UniqueDirectIndexPointIter, UniqueDirectIndexRangeIter}; +pub use self::index::{Index, RangedIndex}; +pub use self::key_size::KeySize; -type BtreeIndex = multimap::MultiMap; -type BtreeIndexPointIter<'a> = SameKeyEntryIter<'a, RowPointer>; -type BtreeIndexRangeIter<'a, K> = multimap::MultiMapRangeIter<'a, K, RowPointer>; -type BtreeUniqueIndex = uniquemap::UniqueMap; -type BtreeUniqueIndexPointIter<'a> = uniquemap::UniqueMapPointIter<'a, RowPointer>; -type BtreeUniqueIndexRangeIter<'a, K> = uniquemap::UniqueMapRangeIter<'a, K, RowPointer>; +type BtreeIndex = multimap::MultiMap; +type BtreeIndexPointIter<'a> = SameKeyEntryIter<'a>; +type BtreeIndexRangeIter<'a, K> = multimap::MultiMapRangeIter<'a, K>; +type BtreeUniqueIndex = uniquemap::UniqueMap; +type BtreeUniqueIndexPointIter<'a> = uniquemap::UniqueMapPointIter<'a>; +type BtreeUniqueIndexRangeIter<'a, K> = uniquemap::UniqueMapRangeIter<'a, K>; /// A point iterator over a [`TypedIndex`], with a specialized key type. /// @@ -68,8 +70,8 @@ impl Iterator for TypedIndexPointIter<'_> { type Item = RowPointer; fn next(&mut self) -> Option { match self { - Self::BTree(this) => this.next().copied(), - Self::UniqueBTree(this) => this.next().copied(), + Self::BTree(this) => this.next(), + Self::UniqueBTree(this) => this.next(), Self::UniqueDirect(this) => this.next(), } } @@ -144,41 +146,41 @@ impl Iterator for TypedIndexRangeIter<'_> { match self { Self::RangeEmpty => None, - Self::BtreeBool(this) => this.next().copied(), - Self::BtreeU8(this) => this.next().copied(), - Self::BtreeI8(this) => this.next().copied(), - Self::BtreeU16(this) => this.next().copied(), - Self::BtreeI16(this) => this.next().copied(), - Self::BtreeU32(this) => this.next().copied(), - Self::BtreeI32(this) => this.next().copied(), - Self::BtreeU64(this) => this.next().copied(), - Self::BtreeI64(this) => this.next().copied(), - Self::BtreeU128(this) => this.next().copied(), - Self::BtreeI128(this) => this.next().copied(), - Self::BtreeU256(this) => this.next().copied(), - Self::BtreeI256(this) => this.next().copied(), - Self::BtreeF32(this) => this.next().copied(), - Self::BtreeF64(this) => this.next().copied(), - Self::BtreeString(this) => this.next().copied(), - Self::BtreeAV(this) => this.next().copied(), - - Self::UniqueBtreeBool(this) => this.next().copied(), - Self::UniqueBtreeU8(this) => this.next().copied(), - Self::UniqueBtreeI8(this) => this.next().copied(), - Self::UniqueBtreeU16(this) => this.next().copied(), - Self::UniqueBtreeI16(this) => this.next().copied(), - Self::UniqueBtreeU32(this) => this.next().copied(), - Self::UniqueBtreeI32(this) => this.next().copied(), - Self::UniqueBtreeU64(this) => this.next().copied(), - Self::UniqueBtreeI64(this) => this.next().copied(), - Self::UniqueBtreeU128(this) => this.next().copied(), - Self::UniqueBtreeI128(this) => this.next().copied(), - Self::UniqueBtreeU256(this) => this.next().copied(), - Self::UniqueBtreeI256(this) => this.next().copied(), - Self::UniqueBtreeF32(this) => this.next().copied(), - Self::UniqueBtreeF64(this) => this.next().copied(), - Self::UniqueBtreeString(this) => this.next().copied(), - Self::UniqueBtreeAV(this) => this.next().copied(), + Self::BtreeBool(this) => this.next(), + Self::BtreeU8(this) => this.next(), + Self::BtreeI8(this) => this.next(), + Self::BtreeU16(this) => this.next(), + Self::BtreeI16(this) => this.next(), + Self::BtreeU32(this) => this.next(), + Self::BtreeI32(this) => this.next(), + Self::BtreeU64(this) => this.next(), + Self::BtreeI64(this) => this.next(), + Self::BtreeU128(this) => this.next(), + Self::BtreeI128(this) => this.next(), + Self::BtreeU256(this) => this.next(), + Self::BtreeI256(this) => this.next(), + Self::BtreeF32(this) => this.next(), + Self::BtreeF64(this) => this.next(), + Self::BtreeString(this) => this.next(), + Self::BtreeAV(this) => this.next(), + + Self::UniqueBtreeBool(this) => this.next(), + Self::UniqueBtreeU8(this) => this.next(), + Self::UniqueBtreeI8(this) => this.next(), + Self::UniqueBtreeU16(this) => this.next(), + Self::UniqueBtreeI16(this) => this.next(), + Self::UniqueBtreeU32(this) => this.next(), + Self::UniqueBtreeI32(this) => this.next(), + Self::UniqueBtreeU64(this) => this.next(), + Self::UniqueBtreeI64(this) => this.next(), + Self::UniqueBtreeU128(this) => this.next(), + Self::UniqueBtreeI128(this) => this.next(), + Self::UniqueBtreeU256(this) => this.next(), + Self::UniqueBtreeI256(this) => this.next(), + Self::UniqueBtreeF32(this) => this.next(), + Self::UniqueBtreeF64(this) => this.next(), + Self::UniqueBtreeString(this) => this.next(), + Self::UniqueBtreeAV(this) => this.next(), Self::UniqueDirect(this) => this.next(), Self::UniqueDirectU8(this) => this.next(), @@ -389,47 +391,47 @@ impl TypedIndex { fn clone_structure(&self) -> Self { use TypedIndex::*; match self { - BtreeBool(_) => BtreeBool(<_>::default()), - BtreeU8(_) => BtreeU8(<_>::default()), - BtreeSumTag(_) => BtreeSumTag(<_>::default()), - BtreeI8(_) => BtreeI8(<_>::default()), - BtreeU16(_) => BtreeU16(<_>::default()), - BtreeI16(_) => BtreeI16(<_>::default()), - BtreeU32(_) => BtreeU32(<_>::default()), - BtreeI32(_) => BtreeI32(<_>::default()), - BtreeU64(_) => BtreeU64(<_>::default()), - BtreeI64(_) => BtreeI64(<_>::default()), - BtreeU128(_) => BtreeU128(<_>::default()), - BtreeI128(_) => BtreeI128(<_>::default()), - BtreeU256(_) => BtreeU256(<_>::default()), - BtreeI256(_) => BtreeI256(<_>::default()), - BtreeF32(_) => BtreeF32(<_>::default()), - BtreeF64(_) => BtreeF64(<_>::default()), - BtreeString(_) => BtreeString(<_>::default()), - BtreeAV(_) => BtreeAV(<_>::default()), - UniqueBtreeBool(_) => UniqueBtreeBool(<_>::default()), - UniqueBtreeU8(_) => UniqueBtreeU8(<_>::default()), - UniqueBtreeSumTag(_) => UniqueBtreeSumTag(<_>::default()), - UniqueBtreeI8(_) => UniqueBtreeI8(<_>::default()), - UniqueBtreeU16(_) => UniqueBtreeU16(<_>::default()), - UniqueBtreeI16(_) => UniqueBtreeI16(<_>::default()), - UniqueBtreeU32(_) => UniqueBtreeU32(<_>::default()), - UniqueBtreeI32(_) => UniqueBtreeI32(<_>::default()), - UniqueBtreeU64(_) => UniqueBtreeU64(<_>::default()), - UniqueBtreeI64(_) => UniqueBtreeI64(<_>::default()), - UniqueBtreeU128(_) => UniqueBtreeU128(<_>::default()), - UniqueBtreeI128(_) => UniqueBtreeI128(<_>::default()), - UniqueBtreeU256(_) => UniqueBtreeU256(<_>::default()), - UniqueBtreeI256(_) => UniqueBtreeI256(<_>::default()), - UniqueBtreeF32(_) => UniqueBtreeF32(<_>::default()), - UniqueBtreeF64(_) => UniqueBtreeF64(<_>::default()), - UniqueBtreeString(_) => UniqueBtreeString(<_>::default()), - UniqueBtreeAV(_) => UniqueBtreeAV(<_>::default()), - UniqueDirectU8(_) => UniqueDirectU8(<_>::default()), - UniqueDirectSumTag(idx) => UniqueDirectSumTag(idx.clone_structure()), - UniqueDirectU16(_) => UniqueDirectU16(<_>::default()), - UniqueDirectU32(_) => UniqueDirectU32(<_>::default()), - UniqueDirectU64(_) => UniqueDirectU64(<_>::default()), + BtreeBool(i) => BtreeBool(i.clone_structure()), + BtreeU8(i) => BtreeU8(i.clone_structure()), + BtreeSumTag(i) => BtreeSumTag(i.clone_structure()), + BtreeI8(i) => BtreeI8(i.clone_structure()), + BtreeU16(i) => BtreeU16(i.clone_structure()), + BtreeI16(i) => BtreeI16(i.clone_structure()), + BtreeU32(i) => BtreeU32(i.clone_structure()), + BtreeI32(i) => BtreeI32(i.clone_structure()), + BtreeU64(i) => BtreeU64(i.clone_structure()), + BtreeI64(i) => BtreeI64(i.clone_structure()), + BtreeU128(i) => BtreeU128(i.clone_structure()), + BtreeI128(i) => BtreeI128(i.clone_structure()), + BtreeU256(i) => BtreeU256(i.clone_structure()), + BtreeI256(i) => BtreeI256(i.clone_structure()), + BtreeF32(i) => BtreeF32(i.clone_structure()), + BtreeF64(i) => BtreeF64(i.clone_structure()), + BtreeString(i) => BtreeString(i.clone_structure()), + BtreeAV(i) => BtreeAV(i.clone_structure()), + UniqueBtreeBool(i) => UniqueBtreeBool(i.clone_structure()), + UniqueBtreeU8(i) => UniqueBtreeU8(i.clone_structure()), + UniqueBtreeSumTag(i) => UniqueBtreeSumTag(i.clone_structure()), + UniqueBtreeI8(i) => UniqueBtreeI8(i.clone_structure()), + UniqueBtreeU16(i) => UniqueBtreeU16(i.clone_structure()), + UniqueBtreeI16(i) => UniqueBtreeI16(i.clone_structure()), + UniqueBtreeU32(i) => UniqueBtreeU32(i.clone_structure()), + UniqueBtreeI32(i) => UniqueBtreeI32(i.clone_structure()), + UniqueBtreeU64(i) => UniqueBtreeU64(i.clone_structure()), + UniqueBtreeI64(i) => UniqueBtreeI64(i.clone_structure()), + UniqueBtreeU128(i) => UniqueBtreeU128(i.clone_structure()), + UniqueBtreeI128(i) => UniqueBtreeI128(i.clone_structure()), + UniqueBtreeU256(i) => UniqueBtreeU256(i.clone_structure()), + UniqueBtreeI256(i) => UniqueBtreeI256(i.clone_structure()), + UniqueBtreeF32(i) => UniqueBtreeF32(i.clone_structure()), + UniqueBtreeF64(i) => UniqueBtreeF64(i.clone_structure()), + UniqueBtreeString(i) => UniqueBtreeString(i.clone_structure()), + UniqueBtreeAV(i) => UniqueBtreeAV(i.clone_structure()), + UniqueDirectU8(i) => UniqueDirectU8(i.clone_structure()), + UniqueDirectSumTag(i) => UniqueDirectSumTag(i.clone_structure()), + UniqueDirectU16(i) => UniqueDirectU16(i.clone_structure()), + UniqueDirectU32(i) => UniqueDirectU32(i.clone_structure()), + UniqueDirectU64(i) => UniqueDirectU64(i.clone_structure()), } } @@ -483,7 +485,7 @@ impl TypedIndex { /// /// 1. Caller promises that `cols` matches what was given at construction (`Self::new`). /// 2. Caller promises that the projection of `row_ref`'s type's equals the index's key type. - unsafe fn insert(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result { + unsafe fn insert(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result<(), RowPointer> { fn project_to_singleton_key(cols: &ColList, row_ref: RowRef<'_>) -> T { // Extract the column. let col_pos = cols.as_singleton(); @@ -512,113 +514,68 @@ impl TypedIndex { unsafe { T::unchecked_read_column(row_ref, col_layout) } } - fn mm_insert_at_type( - this: &mut BtreeIndex, - cols: &ColList, - row_ref: RowRef<'_>, - ) -> Result { - let key: T = project_to_singleton_key(cols, row_ref); - let key_size = key.key_size_in_bytes(); - this.insert(key, row_ref.pointer()); - Ok(key_size) - } - fn um_insert_at_type( - this: &mut BtreeUniqueIndex, - cols: &ColList, - row_ref: RowRef<'_>, - ) -> Result { - let key: T = project_to_singleton_key(cols, row_ref); - let key_size = key.key_size_in_bytes(); - this.insert(key, row_ref.pointer()) - .map_err(|ptr| *ptr) - .map(|_| key_size) - } - fn direct_insert_at_type( - this: &mut UniqueDirectIndex, - cols: &ColList, - row_ref: RowRef<'_>, - to_usize: impl FnOnce(T) -> usize, - ) -> Result { - let key: T = project_to_singleton_key(cols, row_ref); - let key = to_usize(key); - let key_size = key.key_size_in_bytes(); - this.insert(key, row_ref.pointer()).map(|_| key_size) - } - fn direct_u8_insert_at_type( - this: &mut UniqueDirectFixedCapIndex, + fn insert_at_type( + this: &mut I, cols: &ColList, row_ref: RowRef<'_>, - to_u8: impl FnOnce(T) -> usize, - ) -> Result { - let key: T = project_to_singleton_key(cols, row_ref); - let key = to_u8(key); - let key_size = key.key_size_in_bytes(); - this.insert(key, row_ref.pointer()).map(|_| key_size) + convert: impl FnOnce(T) -> I::Key, + ) -> Result<(), RowPointer> { + let key = project_to_singleton_key(cols, row_ref); + this.insert(convert(key), row_ref.pointer()) } + + use core::convert::identity as id; + match self { - Self::BtreeBool(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeU8(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeSumTag(idx) => { - let SumTag(key) = project_to_singleton_key(cols, row_ref); - let key_size = key.key_size_in_bytes(); - idx.insert(key, row_ref.pointer()); - Ok(key_size) - } - Self::BtreeI8(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeU16(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeI16(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeU32(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeI32(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeU64(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeI64(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeU128(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeI128(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeU256(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeI256(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeF32(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeF64(idx) => mm_insert_at_type(idx, cols, row_ref), - Self::BtreeString(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::BtreeBool(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeU8(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeSumTag(idx) => insert_at_type(idx, cols, row_ref, |SumTag(k)| k), + Self::BtreeI8(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeU16(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeI16(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeU32(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeI32(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeU64(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeI64(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeU128(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeI128(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeU256(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeI256(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeF32(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeF64(idx) => insert_at_type(idx, cols, row_ref, id), + Self::BtreeString(idx) => insert_at_type(idx, cols, row_ref, id), Self::BtreeAV(this) => { // SAFETY: Caller promised that any `col` in `cols` is in-bounds of `row_ref`'s layout. let key = unsafe { row_ref.project_unchecked(cols) }; - let key_size = key.key_size_in_bytes(); - this.insert(key, row_ref.pointer()); - Ok(key_size) - } - Self::UniqueBtreeBool(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeU8(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeSumTag(idx) => { - let SumTag(key) = project_to_singleton_key(cols, row_ref); - let key_size = key.key_size_in_bytes(); - idx.insert(key, row_ref.pointer()).map_err(|ptr| *ptr).map(|_| key_size) + this.insert(key, row_ref.pointer()) } - Self::UniqueBtreeI8(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeU16(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeI16(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeU32(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeI32(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeU64(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeI64(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeU128(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeI128(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeU256(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeI256(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeF32(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeF64(idx) => um_insert_at_type(idx, cols, row_ref), - Self::UniqueBtreeString(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueBtreeBool(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeU8(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeSumTag(idx) => insert_at_type(idx, cols, row_ref, |SumTag(k)| k), + Self::UniqueBtreeI8(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeU16(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeI16(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeU32(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeI32(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeU64(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeI64(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeU128(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeI128(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeU256(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeI256(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeF32(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeF64(idx) => insert_at_type(idx, cols, row_ref, id), + Self::UniqueBtreeString(idx) => insert_at_type(idx, cols, row_ref, id), Self::UniqueBtreeAV(this) => { // SAFETY: Caller promised that any `col` in `cols` is in-bounds of `row_ref`'s layout. let key = unsafe { row_ref.project_unchecked(cols) }; - let key_size = key.key_size_in_bytes(); this.insert(key, row_ref.pointer()) - .map_err(|ptr| *ptr) - .map(|_| key_size) } - Self::UniqueDirectSumTag(idx) => direct_u8_insert_at_type(idx, cols, row_ref, |SumTag(tag)| tag as usize), - Self::UniqueDirectU8(idx) => direct_insert_at_type(idx, cols, row_ref, |k: u8| k as usize), - Self::UniqueDirectU16(idx) => direct_insert_at_type(idx, cols, row_ref, |k: u16| k as usize), - Self::UniqueDirectU32(idx) => direct_insert_at_type(idx, cols, row_ref, |k: u32| k as usize), - Self::UniqueDirectU64(idx) => direct_insert_at_type(idx, cols, row_ref, |k: u64| k as usize), + Self::UniqueDirectSumTag(idx) => insert_at_type(idx, cols, row_ref, |SumTag(tag)| tag), + Self::UniqueDirectU8(idx) => insert_at_type(idx, cols, row_ref, |k: u8| k as usize), + Self::UniqueDirectU16(idx) => insert_at_type(idx, cols, row_ref, |k: u16| k as usize), + Self::UniqueDirectU32(idx) => insert_at_type(idx, cols, row_ref, |k: u32| k as usize), + Self::UniqueDirectU64(idx) => insert_at_type(idx, cols, row_ref, |k: u64| k as usize), } } @@ -631,204 +588,145 @@ impl TypedIndex { /// or remove the wrong value from the index. /// Note, however, that it will not invoke undefined behavior. /// - /// If the row was present and has been deleted, returns `Ok(Some(key_size_in_bytes))`, - /// where `key_size_in_bytes` is the size of the key. - /// [`TableIndex::delete`] will use this - /// to update the counter for [`TableIndex::num_key_bytes`]. - /// We want to store said counter outside of the [`TypedIndex`] enum, - /// but we can only compute the size using type info within the [`TypedIndex`], - /// so we have to return the size across this boundary. + /// If the row was present and has been deleted, returns `Ok(true)`. // TODO(centril): make this unsafe and use unchecked conversions. - fn delete(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result, InvalidFieldError> { - fn mm_delete_at_type( - this: &mut BtreeIndex, - cols: &ColList, - row_ref: RowRef<'_>, - ) -> Result, InvalidFieldError> { - let col_pos = cols.as_singleton().unwrap(); - let key: T = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - let key_size = key.key_size_in_bytes(); - Ok(this.delete(&key, &row_ref.pointer()).then_some(key_size)) - } - fn um_delete_at_type( - this: &mut BtreeUniqueIndex, - cols: &ColList, - row_ref: RowRef<'_>, - ) -> Result, InvalidFieldError> { - let col_pos = cols.as_singleton().unwrap(); - let key: T = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - let key_size = key.key_size_in_bytes(); - Ok(this.delete(&key).then_some(key_size)) - } - fn direct_delete_at_type( - this: &mut UniqueDirectIndex, - cols: &ColList, - row_ref: RowRef<'_>, - to_usize: impl FnOnce(T) -> usize, - ) -> Result, InvalidFieldError> { - let col_pos = cols.as_singleton().unwrap(); - let key: T = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - let key = to_usize(key); - let key_size = key.key_size_in_bytes(); - Ok(this.delete(key).then_some(key_size)) - } - fn direct_u8_delete_at_type( - this: &mut UniqueDirectFixedCapIndex, + fn delete(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result { + fn delete_at_type( + this: &mut I, cols: &ColList, row_ref: RowRef<'_>, - to_u8: impl FnOnce(T) -> usize, - ) -> Result, InvalidFieldError> { + convert: impl FnOnce(T) -> I::Key, + ) -> Result { let col_pos = cols.as_singleton().unwrap(); - let key: T = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - let key = to_u8(key); - let key_size = key.key_size_in_bytes(); - Ok(this.delete(key).then_some(key_size)) + let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; + let key = convert(key); + Ok(this.delete(&key, row_ref.pointer())) } + use core::convert::identity as id; + match self { - Self::BtreeBool(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeU8(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeSumTag(this) => { - let col_pos = cols.as_singleton().unwrap(); - let SumTag(key) = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - let key_size = key.key_size_in_bytes(); - Ok(this.delete(&key, &row_ref.pointer()).then_some(key_size)) - } - Self::BtreeI8(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeU16(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeI16(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeU32(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeI32(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeU64(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeI64(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeU128(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeI128(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeU256(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeI256(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeF32(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeF64(this) => mm_delete_at_type(this, cols, row_ref), - Self::BtreeString(this) => mm_delete_at_type(this, cols, row_ref), + Self::BtreeBool(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeU8(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeSumTag(this) => delete_at_type(this, cols, row_ref, |SumTag(k)| k), + Self::BtreeI8(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeU16(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeI16(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeU32(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeI32(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeU64(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeI64(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeU128(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeI128(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeU256(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeI256(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeF32(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeF64(this) => delete_at_type(this, cols, row_ref, id), + Self::BtreeString(this) => delete_at_type(this, cols, row_ref, id), Self::BtreeAV(this) => { let key = row_ref.project(cols)?; - let key_size = key.key_size_in_bytes(); - Ok(this.delete(&key, &row_ref.pointer()).then_some(key_size)) - } - Self::UniqueBtreeBool(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeU8(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeSumTag(this) => { - let col_pos = cols.as_singleton().unwrap(); - let SumTag(key) = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - let key_size = key.key_size_in_bytes(); - Ok(this.delete(&key).then_some(key_size)) + Ok(this.delete(&key, row_ref.pointer())) } - Self::UniqueBtreeI8(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeU16(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeI16(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeU32(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeI32(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeU64(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeI64(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeU128(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeI128(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeU256(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeI256(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeF32(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeF64(this) => um_delete_at_type(this, cols, row_ref), - Self::UniqueBtreeString(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueBtreeBool(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeU8(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeSumTag(this) => delete_at_type(this, cols, row_ref, |SumTag(k)| k), + Self::UniqueBtreeI8(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeU16(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeI16(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeU32(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeI32(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeU64(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeI64(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeU128(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeI128(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeU256(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeI256(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeF32(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeF64(this) => delete_at_type(this, cols, row_ref, id), + Self::UniqueBtreeString(this) => delete_at_type(this, cols, row_ref, id), Self::UniqueBtreeAV(this) => { let key = row_ref.project(cols)?; - let key_size = key.key_size_in_bytes(); - Ok(this.delete(&key).then_some(key_size)) + Ok(this.delete(&key, row_ref.pointer())) } - Self::UniqueDirectSumTag(this) => direct_u8_delete_at_type(this, cols, row_ref, |SumTag(k)| k as usize), - Self::UniqueDirectU8(this) => direct_delete_at_type(this, cols, row_ref, |k: u8| k as usize), - Self::UniqueDirectU16(this) => direct_delete_at_type(this, cols, row_ref, |k: u16| k as usize), - Self::UniqueDirectU32(this) => direct_delete_at_type(this, cols, row_ref, |k: u32| k as usize), - Self::UniqueDirectU64(this) => direct_delete_at_type(this, cols, row_ref, |k: u64| k as usize), + Self::UniqueDirectSumTag(this) => delete_at_type(this, cols, row_ref, |SumTag(k)| k), + Self::UniqueDirectU8(this) => delete_at_type(this, cols, row_ref, |k: u8| k as usize), + Self::UniqueDirectU16(this) => delete_at_type(this, cols, row_ref, |k: u16| k as usize), + Self::UniqueDirectU32(this) => delete_at_type(this, cols, row_ref, |k: u32| k as usize), + Self::UniqueDirectU64(this) => delete_at_type(this, cols, row_ref, |k: u64| k as usize), } } fn seek_point(&self, key: &AlgebraicValue) -> TypedIndexPointIter<'_> { - fn mm_iter_at_type<'a, T: Ord>( - this: &'a BtreeIndex, - key: &AlgebraicValue, - av_as_t: impl Fn(&AlgebraicValue) -> Option<&T>, - ) -> BtreeIndexPointIter<'a> { - this.values_in_point(av_as_t(key).expect("key does not conform to key type of index")) - } - fn um_iter_at_type<'a, T: Ord>( - this: &'a BtreeUniqueIndex, + fn iter_at_type<'a, I: Index>( + this: &'a I, key: &AlgebraicValue, - av_as_t: impl Fn(&AlgebraicValue) -> Option<&T>, - ) -> BtreeUniqueIndexPointIter<'a> { - this.values_in_point(av_as_t(key).expect("key does not conform to key type of index")) + av_as_t: impl Fn(&AlgebraicValue) -> Option<&I::Key>, + ) -> I::PointIter<'a> { + this.seek_point(av_as_t(key).expect("key does not conform to key type of index")) } - fn direct_iter_at_type( - this: &UniqueDirectIndex, + fn convert_iter_at_type<'a, T, I: Index>( + this: &'a I, key: &AlgebraicValue, av_as_t: impl Fn(&AlgebraicValue) -> Option<&T>, - to_usize: impl Copy + FnOnce(&T) -> usize, - ) -> UniqueDirectIndexPointIter { + convert: impl Copy + FnOnce(&T) -> I::Key, + ) -> I::PointIter<'a> { let av_as_t = |v| av_as_t(v).expect("key does not conform to key type of index"); - this.seek_point(to_usize(av_as_t(key))) + this.seek_point(&convert(av_as_t(key))) } use TypedIndex::*; use TypedIndexPointIter::*; match self { - BtreeBool(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_bool)), - BtreeU8(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_u8)), - BtreeSumTag(this) => BTree(mm_iter_at_type(this, key, as_tag)), - BtreeI8(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_i8)), - BtreeU16(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_u16)), - BtreeI16(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_i16)), - BtreeU32(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_u32)), - BtreeI32(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_i32)), - BtreeU64(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_u64)), - BtreeI64(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_i64)), - BtreeU128(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_u128)), - BtreeI128(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_i128)), - BtreeU256(this) => BTree(mm_iter_at_type(this, key, |av| av.as_u256().map(|x| &**x))), - BtreeI256(this) => BTree(mm_iter_at_type(this, key, |av| av.as_i256().map(|x| &**x))), - BtreeF32(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_f32)), - BtreeF64(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_f64)), - BtreeString(this) => BTree(mm_iter_at_type(this, key, AlgebraicValue::as_string)), - BtreeAV(this) => BTree(this.values_in_point(key)), - - UniqueBtreeBool(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_bool)), - UniqueBtreeU8(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_u8)), - UniqueBtreeSumTag(this) => UniqueBTree(um_iter_at_type(this, key, as_tag)), - UniqueBtreeI8(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_i8)), - UniqueBtreeU16(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_u16)), - UniqueBtreeI16(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_i16)), - UniqueBtreeU32(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_u32)), - UniqueBtreeI32(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_i32)), - UniqueBtreeU64(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_u64)), - UniqueBtreeI64(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_i64)), - UniqueBtreeU128(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_u128)), - UniqueBtreeI128(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_i128)), - UniqueBtreeU256(this) => UniqueBTree(um_iter_at_type(this, key, |av| av.as_u256().map(|x| &**x))), - UniqueBtreeI256(this) => UniqueBTree(um_iter_at_type(this, key, |av| av.as_i256().map(|x| &**x))), - UniqueBtreeF32(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_f32)), - UniqueBtreeF64(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_f64)), - UniqueBtreeString(this) => UniqueBTree(um_iter_at_type(this, key, AlgebraicValue::as_string)), - UniqueBtreeAV(this) => UniqueBTree(this.values_in_point(key)), - - UniqueDirectSumTag(this) => { - let key = as_tag(key).expect("key does not conform to key type of index"); - UniqueDirect(this.seek_point(*key as usize)) - } + BtreeBool(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_bool)), + BtreeU8(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_u8)), + BtreeSumTag(this) => BTree(iter_at_type(this, key, as_tag)), + BtreeI8(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_i8)), + BtreeU16(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_u16)), + BtreeI16(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_i16)), + BtreeU32(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_u32)), + BtreeI32(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_i32)), + BtreeU64(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_u64)), + BtreeI64(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_i64)), + BtreeU128(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_u128)), + BtreeI128(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_i128)), + BtreeU256(this) => BTree(iter_at_type(this, key, |av| av.as_u256().map(|x| &**x))), + BtreeI256(this) => BTree(iter_at_type(this, key, |av| av.as_i256().map(|x| &**x))), + BtreeF32(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_f32)), + BtreeF64(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_f64)), + BtreeString(this) => BTree(iter_at_type(this, key, AlgebraicValue::as_string)), + BtreeAV(this) => BTree(this.seek_point(key)), + + UniqueBtreeBool(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_bool)), + UniqueBtreeU8(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_u8)), + UniqueBtreeSumTag(this) => UniqueBTree(iter_at_type(this, key, as_tag)), + UniqueBtreeI8(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_i8)), + UniqueBtreeU16(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_u16)), + UniqueBtreeI16(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_i16)), + UniqueBtreeU32(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_u32)), + UniqueBtreeI32(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_i32)), + UniqueBtreeU64(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_u64)), + UniqueBtreeI64(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_i64)), + UniqueBtreeU128(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_u128)), + UniqueBtreeI128(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_i128)), + UniqueBtreeU256(this) => UniqueBTree(iter_at_type(this, key, |av| av.as_u256().map(|x| &**x))), + UniqueBtreeI256(this) => UniqueBTree(iter_at_type(this, key, |av| av.as_i256().map(|x| &**x))), + UniqueBtreeF32(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_f32)), + UniqueBtreeF64(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_f64)), + UniqueBtreeString(this) => UniqueBTree(iter_at_type(this, key, AlgebraicValue::as_string)), + UniqueBtreeAV(this) => UniqueBTree(this.seek_point(key)), + + UniqueDirectSumTag(this) => UniqueDirect(iter_at_type(this, key, as_tag)), UniqueDirectU8(this) => { - UniqueDirect(direct_iter_at_type(this, key, AlgebraicValue::as_u8, |k| *k as usize)) + UniqueDirect(convert_iter_at_type(this, key, AlgebraicValue::as_u8, |k| *k as usize)) } UniqueDirectU16(this) => { - UniqueDirect(direct_iter_at_type(this, key, AlgebraicValue::as_u16, |k| *k as usize)) + UniqueDirect(convert_iter_at_type(this, key, AlgebraicValue::as_u16, |k| *k as usize)) } UniqueDirectU32(this) => { - UniqueDirect(direct_iter_at_type(this, key, AlgebraicValue::as_u32, |k| *k as usize)) + UniqueDirect(convert_iter_at_type(this, key, AlgebraicValue::as_u32, |k| *k as usize)) } UniqueDirectU64(this) => { - UniqueDirect(direct_iter_at_type(this, key, AlgebraicValue::as_u64, |k| *k as usize)) + UniqueDirect(convert_iter_at_type(this, key, AlgebraicValue::as_u64, |k| *k as usize)) } } } @@ -852,104 +750,84 @@ impl TypedIndex { return RangeEmpty; } - fn mm_iter_at_type<'a, T: Ord>( - this: &'a BtreeIndex, + fn iter_at_type<'a, I: RangedIndex>( + this: &'a I, range: &impl RangeBounds, - av_as_t: impl Fn(&AlgebraicValue) -> Option<&T>, - ) -> BtreeIndexRangeIter<'a, T> { + av_as_t: impl Fn(&AlgebraicValue) -> Option<&I::Key>, + ) -> I::RangeIter<'a> { let av_as_t = |v| av_as_t(v).expect("bound does not conform to key type of index"); let start = range.start_bound().map(av_as_t); let end = range.end_bound().map(av_as_t); - this.values_in_range(&(start, end)) - } - fn um_iter_at_type<'a, T: Ord>( - this: &'a BtreeUniqueIndex, - range: &impl RangeBounds, - av_as_t: impl Fn(&AlgebraicValue) -> Option<&T>, - ) -> BtreeUniqueIndexRangeIter<'a, T> { - let av_as_t = |v| av_as_t(v).expect("bound does not conform to key type of index"); - let start = range.start_bound().map(av_as_t); - let end = range.end_bound().map(av_as_t); - this.values_in_range(&(start, end)) + this.seek_range(&(start, end)) } - fn direct_iter_at_type<'a, T>( - this: &'a UniqueDirectIndex, + fn convert_iter_at_type<'a, T, I: RangedIndex>( + this: &'a I, range: &impl RangeBounds, av_as_t: impl Fn(&AlgebraicValue) -> Option<&T>, - to_usize: impl Copy + FnOnce(&T) -> usize, - ) -> UniqueDirectIndexRangeIter<'a> { + convert: impl Copy + FnOnce(&T) -> I::Key, + ) -> I::RangeIter<'a> { let av_as_t = |v| av_as_t(v).expect("bound does not conform to key type of index"); - let start = range.start_bound().map(av_as_t).map(to_usize); - let end = range.end_bound().map(av_as_t).map(to_usize); + let start = range.start_bound().map(av_as_t).map(convert); + let end = range.end_bound().map(av_as_t).map(convert); this.seek_range(&(start, end)) } use TypedIndexRangeIter::*; match self { - Self::BtreeBool(this) => BtreeBool(mm_iter_at_type(this, range, AlgebraicValue::as_bool)), - Self::BtreeU8(this) => BtreeU8(mm_iter_at_type(this, range, AlgebraicValue::as_u8)), - Self::BtreeSumTag(this) => BtreeU8(mm_iter_at_type(this, range, as_tag)), - Self::BtreeI8(this) => BtreeI8(mm_iter_at_type(this, range, AlgebraicValue::as_i8)), - Self::BtreeU16(this) => BtreeU16(mm_iter_at_type(this, range, AlgebraicValue::as_u16)), - Self::BtreeI16(this) => BtreeI16(mm_iter_at_type(this, range, AlgebraicValue::as_i16)), - Self::BtreeU32(this) => BtreeU32(mm_iter_at_type(this, range, AlgebraicValue::as_u32)), - Self::BtreeI32(this) => BtreeI32(mm_iter_at_type(this, range, AlgebraicValue::as_i32)), - Self::BtreeU64(this) => BtreeU64(mm_iter_at_type(this, range, AlgebraicValue::as_u64)), - Self::BtreeI64(this) => BtreeI64(mm_iter_at_type(this, range, AlgebraicValue::as_i64)), - Self::BtreeU128(this) => BtreeU128(mm_iter_at_type(this, range, AlgebraicValue::as_u128)), - Self::BtreeI128(this) => BtreeI128(mm_iter_at_type(this, range, AlgebraicValue::as_i128)), - Self::BtreeU256(this) => BtreeU256(mm_iter_at_type(this, range, |av| av.as_u256().map(|x| &**x))), - Self::BtreeI256(this) => BtreeI256(mm_iter_at_type(this, range, |av| av.as_i256().map(|x| &**x))), - Self::BtreeF32(this) => BtreeF32(mm_iter_at_type(this, range, AlgebraicValue::as_f32)), - Self::BtreeF64(this) => BtreeF64(mm_iter_at_type(this, range, AlgebraicValue::as_f64)), - Self::BtreeString(this) => BtreeString(mm_iter_at_type(this, range, AlgebraicValue::as_string)), - Self::BtreeAV(this) => BtreeAV(this.values_in_range(range)), - - Self::UniqueBtreeBool(this) => UniqueBtreeBool(um_iter_at_type(this, range, AlgebraicValue::as_bool)), - Self::UniqueBtreeU8(this) => UniqueBtreeU8(um_iter_at_type(this, range, AlgebraicValue::as_u8)), - Self::UniqueBtreeSumTag(this) => UniqueBtreeU8(um_iter_at_type(this, range, as_tag)), - Self::UniqueBtreeI8(this) => UniqueBtreeI8(um_iter_at_type(this, range, AlgebraicValue::as_i8)), - Self::UniqueBtreeU16(this) => UniqueBtreeU16(um_iter_at_type(this, range, AlgebraicValue::as_u16)), - Self::UniqueBtreeI16(this) => UniqueBtreeI16(um_iter_at_type(this, range, AlgebraicValue::as_i16)), - Self::UniqueBtreeU32(this) => UniqueBtreeU32(um_iter_at_type(this, range, AlgebraicValue::as_u32)), - Self::UniqueBtreeI32(this) => UniqueBtreeI32(um_iter_at_type(this, range, AlgebraicValue::as_i32)), - Self::UniqueBtreeU64(this) => UniqueBtreeU64(um_iter_at_type(this, range, AlgebraicValue::as_u64)), - Self::UniqueBtreeI64(this) => UniqueBtreeI64(um_iter_at_type(this, range, AlgebraicValue::as_i64)), - Self::UniqueBtreeU128(this) => UniqueBtreeU128(um_iter_at_type(this, range, AlgebraicValue::as_u128)), - Self::UniqueBtreeI128(this) => UniqueBtreeI128(um_iter_at_type(this, range, AlgebraicValue::as_i128)), - Self::UniqueBtreeF32(this) => UniqueBtreeF32(um_iter_at_type(this, range, AlgebraicValue::as_f32)), - Self::UniqueBtreeF64(this) => UniqueBtreeF64(um_iter_at_type(this, range, AlgebraicValue::as_f64)), - Self::UniqueBtreeU256(this) => { - UniqueBtreeU256(um_iter_at_type(this, range, |av| av.as_u256().map(|x| &**x))) - } - Self::UniqueBtreeI256(this) => { - UniqueBtreeI256(um_iter_at_type(this, range, |av| av.as_i256().map(|x| &**x))) - } - Self::UniqueBtreeString(this) => UniqueBtreeString(um_iter_at_type(this, range, AlgebraicValue::as_string)), - Self::UniqueBtreeAV(this) => UniqueBtreeAV(this.values_in_range(range)), - - Self::UniqueDirectSumTag(this) => { - let av_as_t = |v| as_tag(v).copied().expect("bound does not conform to key type of index") as usize; - let start = range.start_bound().map(av_as_t); - let end = range.end_bound().map(av_as_t); - let iter = this.seek_range(&(start, end)); - UniqueDirectU8(iter) - } - Self::UniqueDirectU8(this) => { - UniqueDirect(direct_iter_at_type(this, range, AlgebraicValue::as_u8, |k| *k as usize)) - } + Self::BtreeBool(this) => BtreeBool(iter_at_type(this, range, AlgebraicValue::as_bool)), + Self::BtreeU8(this) => BtreeU8(iter_at_type(this, range, AlgebraicValue::as_u8)), + Self::BtreeSumTag(this) => BtreeU8(iter_at_type(this, range, as_tag)), + Self::BtreeI8(this) => BtreeI8(iter_at_type(this, range, AlgebraicValue::as_i8)), + Self::BtreeU16(this) => BtreeU16(iter_at_type(this, range, AlgebraicValue::as_u16)), + Self::BtreeI16(this) => BtreeI16(iter_at_type(this, range, AlgebraicValue::as_i16)), + Self::BtreeU32(this) => BtreeU32(iter_at_type(this, range, AlgebraicValue::as_u32)), + Self::BtreeI32(this) => BtreeI32(iter_at_type(this, range, AlgebraicValue::as_i32)), + Self::BtreeU64(this) => BtreeU64(iter_at_type(this, range, AlgebraicValue::as_u64)), + Self::BtreeI64(this) => BtreeI64(iter_at_type(this, range, AlgebraicValue::as_i64)), + Self::BtreeU128(this) => BtreeU128(iter_at_type(this, range, AlgebraicValue::as_u128)), + Self::BtreeI128(this) => BtreeI128(iter_at_type(this, range, AlgebraicValue::as_i128)), + Self::BtreeU256(this) => BtreeU256(iter_at_type(this, range, |av| av.as_u256().map(|x| &**x))), + Self::BtreeI256(this) => BtreeI256(iter_at_type(this, range, |av| av.as_i256().map(|x| &**x))), + Self::BtreeF32(this) => BtreeF32(iter_at_type(this, range, AlgebraicValue::as_f32)), + Self::BtreeF64(this) => BtreeF64(iter_at_type(this, range, AlgebraicValue::as_f64)), + Self::BtreeString(this) => BtreeString(iter_at_type(this, range, AlgebraicValue::as_string)), + Self::BtreeAV(this) => BtreeAV(this.seek_range(range)), + + Self::UniqueBtreeBool(this) => UniqueBtreeBool(iter_at_type(this, range, AlgebraicValue::as_bool)), + Self::UniqueBtreeU8(this) => UniqueBtreeU8(iter_at_type(this, range, AlgebraicValue::as_u8)), + Self::UniqueBtreeSumTag(this) => UniqueBtreeU8(iter_at_type(this, range, as_tag)), + Self::UniqueBtreeI8(this) => UniqueBtreeI8(iter_at_type(this, range, AlgebraicValue::as_i8)), + Self::UniqueBtreeU16(this) => UniqueBtreeU16(iter_at_type(this, range, AlgebraicValue::as_u16)), + Self::UniqueBtreeI16(this) => UniqueBtreeI16(iter_at_type(this, range, AlgebraicValue::as_i16)), + Self::UniqueBtreeU32(this) => UniqueBtreeU32(iter_at_type(this, range, AlgebraicValue::as_u32)), + Self::UniqueBtreeI32(this) => UniqueBtreeI32(iter_at_type(this, range, AlgebraicValue::as_i32)), + Self::UniqueBtreeU64(this) => UniqueBtreeU64(iter_at_type(this, range, AlgebraicValue::as_u64)), + Self::UniqueBtreeI64(this) => UniqueBtreeI64(iter_at_type(this, range, AlgebraicValue::as_i64)), + Self::UniqueBtreeU128(this) => UniqueBtreeU128(iter_at_type(this, range, AlgebraicValue::as_u128)), + Self::UniqueBtreeI128(this) => UniqueBtreeI128(iter_at_type(this, range, AlgebraicValue::as_i128)), + Self::UniqueBtreeF32(this) => UniqueBtreeF32(iter_at_type(this, range, AlgebraicValue::as_f32)), + Self::UniqueBtreeF64(this) => UniqueBtreeF64(iter_at_type(this, range, AlgebraicValue::as_f64)), + Self::UniqueBtreeU256(this) => UniqueBtreeU256(iter_at_type(this, range, |av| av.as_u256().map(|x| &**x))), + Self::UniqueBtreeI256(this) => UniqueBtreeI256(iter_at_type(this, range, |av| av.as_i256().map(|x| &**x))), + Self::UniqueBtreeString(this) => UniqueBtreeString(iter_at_type(this, range, AlgebraicValue::as_string)), + Self::UniqueBtreeAV(this) => UniqueBtreeAV(this.seek_range(range)), + + Self::UniqueDirectSumTag(this) => UniqueDirectU8(iter_at_type(this, range, as_tag)), + Self::UniqueDirectU8(this) => UniqueDirect(convert_iter_at_type(this, range, AlgebraicValue::as_u8, |k| { + *k as usize + })), Self::UniqueDirectU16(this) => { - UniqueDirect(direct_iter_at_type(this, range, AlgebraicValue::as_u16, |k| { + UniqueDirect(convert_iter_at_type(this, range, AlgebraicValue::as_u16, |k| { *k as usize })) } Self::UniqueDirectU32(this) => { - UniqueDirect(direct_iter_at_type(this, range, AlgebraicValue::as_u32, |k| { + UniqueDirect(convert_iter_at_type(this, range, AlgebraicValue::as_u32, |k| { *k as usize })) } Self::UniqueDirectU64(this) => { - UniqueDirect(direct_iter_at_type(this, range, AlgebraicValue::as_u64, |k| { + UniqueDirect(convert_iter_at_type(this, range, AlgebraicValue::as_u64, |k| { *k as usize })) } @@ -1004,53 +882,57 @@ impl TypedIndex { #[allow(unused)] // used only by tests fn is_empty(&self) -> bool { - self.len() == 0 + self.num_rows() == 0 } - #[allow(unused)] // used only by tests - fn len(&self) -> usize { + /// The number of rows stored in this index. + /// + /// Note that, for non-unique indexes, this may be larger than [`Self::num_keys`]. + /// + /// This method runs in constant time. + fn num_rows(&self) -> usize { match self { - Self::BtreeBool(this) => this.len(), - Self::BtreeU8(this) | Self::BtreeSumTag(this) => this.len(), - Self::BtreeI8(this) => this.len(), - Self::BtreeU16(this) => this.len(), - Self::BtreeI16(this) => this.len(), - Self::BtreeU32(this) => this.len(), - Self::BtreeI32(this) => this.len(), - Self::BtreeU64(this) => this.len(), - Self::BtreeI64(this) => this.len(), - Self::BtreeU128(this) => this.len(), - Self::BtreeI128(this) => this.len(), - Self::BtreeU256(this) => this.len(), - Self::BtreeI256(this) => this.len(), - Self::BtreeF32(this) => this.len(), - Self::BtreeF64(this) => this.len(), - Self::BtreeString(this) => this.len(), - Self::BtreeAV(this) => this.len(), - - Self::UniqueBtreeBool(this) => this.len(), - Self::UniqueBtreeU8(this) | Self::UniqueBtreeSumTag(this) => this.len(), - Self::UniqueBtreeI8(this) => this.len(), - Self::UniqueBtreeU16(this) => this.len(), - Self::UniqueBtreeI16(this) => this.len(), - Self::UniqueBtreeU32(this) => this.len(), - Self::UniqueBtreeI32(this) => this.len(), - Self::UniqueBtreeU64(this) => this.len(), - Self::UniqueBtreeI64(this) => this.len(), - Self::UniqueBtreeU128(this) => this.len(), - Self::UniqueBtreeI128(this) => this.len(), - Self::UniqueBtreeU256(this) => this.len(), - Self::UniqueBtreeI256(this) => this.len(), - Self::UniqueBtreeF32(this) => this.len(), - Self::UniqueBtreeF64(this) => this.len(), - Self::UniqueBtreeString(this) => this.len(), - Self::UniqueBtreeAV(this) => this.len(), - - Self::UniqueDirectSumTag(this) => this.len(), + Self::BtreeBool(this) => this.num_rows(), + Self::BtreeU8(this) | Self::BtreeSumTag(this) => this.num_rows(), + Self::BtreeI8(this) => this.num_rows(), + Self::BtreeU16(this) => this.num_rows(), + Self::BtreeI16(this) => this.num_rows(), + Self::BtreeU32(this) => this.num_rows(), + Self::BtreeI32(this) => this.num_rows(), + Self::BtreeU64(this) => this.num_rows(), + Self::BtreeI64(this) => this.num_rows(), + Self::BtreeU128(this) => this.num_rows(), + Self::BtreeI128(this) => this.num_rows(), + Self::BtreeU256(this) => this.num_rows(), + Self::BtreeI256(this) => this.num_rows(), + Self::BtreeF32(this) => this.num_rows(), + Self::BtreeF64(this) => this.num_rows(), + Self::BtreeString(this) => this.num_rows(), + Self::BtreeAV(this) => this.num_rows(), + + Self::UniqueBtreeBool(this) => this.num_rows(), + Self::UniqueBtreeU8(this) | Self::UniqueBtreeSumTag(this) => this.num_rows(), + Self::UniqueBtreeI8(this) => this.num_rows(), + Self::UniqueBtreeU16(this) => this.num_rows(), + Self::UniqueBtreeI16(this) => this.num_rows(), + Self::UniqueBtreeU32(this) => this.num_rows(), + Self::UniqueBtreeI32(this) => this.num_rows(), + Self::UniqueBtreeU64(this) => this.num_rows(), + Self::UniqueBtreeI64(this) => this.num_rows(), + Self::UniqueBtreeU128(this) => this.num_rows(), + Self::UniqueBtreeI128(this) => this.num_rows(), + Self::UniqueBtreeU256(this) => this.num_rows(), + Self::UniqueBtreeI256(this) => this.num_rows(), + Self::UniqueBtreeF32(this) => this.num_rows(), + Self::UniqueBtreeF64(this) => this.num_rows(), + Self::UniqueBtreeString(this) => this.num_rows(), + Self::UniqueBtreeAV(this) => this.num_rows(), + + Self::UniqueDirectSumTag(this) => this.num_rows(), Self::UniqueDirectU8(this) | Self::UniqueDirectU16(this) | Self::UniqueDirectU32(this) - | Self::UniqueDirectU64(this) => this.len(), + | Self::UniqueDirectU64(this) => this.num_rows(), } } @@ -1099,6 +981,60 @@ impl TypedIndex { | Self::UniqueDirectU64(this) => this.num_keys(), } } + + /// The number of bytes stored in keys in this index. + /// + /// For non-unique indexes, duplicate keys are counted once for each row that refers to them, + /// even though the internal storage may deduplicate them as an optimization. + /// + /// This method runs in constant time. + /// + /// See the [`KeySize`] trait for more details on how this method computes its result. + pub fn num_key_bytes(&self) -> u64 { + match self { + Self::BtreeBool(this) => this.num_key_bytes(), + Self::BtreeU8(this) | Self::BtreeSumTag(this) => this.num_key_bytes(), + Self::BtreeI8(this) => this.num_key_bytes(), + Self::BtreeU16(this) => this.num_key_bytes(), + Self::BtreeI16(this) => this.num_key_bytes(), + Self::BtreeU32(this) => this.num_key_bytes(), + Self::BtreeI32(this) => this.num_key_bytes(), + Self::BtreeU64(this) => this.num_key_bytes(), + Self::BtreeI64(this) => this.num_key_bytes(), + Self::BtreeU128(this) => this.num_key_bytes(), + Self::BtreeI128(this) => this.num_key_bytes(), + Self::BtreeU256(this) => this.num_key_bytes(), + Self::BtreeI256(this) => this.num_key_bytes(), + Self::BtreeF32(this) => this.num_key_bytes(), + Self::BtreeF64(this) => this.num_key_bytes(), + Self::BtreeString(this) => this.num_key_bytes(), + Self::BtreeAV(this) => this.num_key_bytes(), + + Self::UniqueBtreeBool(this) => this.num_key_bytes(), + Self::UniqueBtreeU8(this) | Self::UniqueBtreeSumTag(this) => this.num_key_bytes(), + Self::UniqueBtreeI8(this) => this.num_key_bytes(), + Self::UniqueBtreeU16(this) => this.num_key_bytes(), + Self::UniqueBtreeI16(this) => this.num_key_bytes(), + Self::UniqueBtreeU32(this) => this.num_key_bytes(), + Self::UniqueBtreeI32(this) => this.num_key_bytes(), + Self::UniqueBtreeU64(this) => this.num_key_bytes(), + Self::UniqueBtreeI64(this) => this.num_key_bytes(), + Self::UniqueBtreeU128(this) => this.num_key_bytes(), + Self::UniqueBtreeI128(this) => this.num_key_bytes(), + Self::UniqueBtreeU256(this) => this.num_key_bytes(), + Self::UniqueBtreeI256(this) => this.num_key_bytes(), + Self::UniqueBtreeF32(this) => this.num_key_bytes(), + Self::UniqueBtreeF64(this) => this.num_key_bytes(), + Self::UniqueBtreeString(this) => this.num_key_bytes(), + Self::UniqueBtreeAV(this) => this.num_key_bytes(), + + Self::UniqueDirectSumTag(this) => this.num_key_bytes(), + Self::UniqueDirectU8(this) + | Self::UniqueDirectU16(this) + | Self::UniqueDirectU32(this) + | Self::UniqueDirectU64(this) => this.num_key_bytes(), + } + } } /// An index on a set of [`ColId`]s of a table. @@ -1111,17 +1047,6 @@ pub struct TableIndex { // NOTE(centril): This is accessed in index scan ABIs for decoding, so don't `Box<_>` it. pub key_type: AlgebraicType, - /// The number of rows in this index. - /// - /// Memoized counter for [`Self::num_rows`]. - num_rows: u64, - - /// The number of key bytes in this index. - /// - /// Memoized counter for [`Self::num_key_bytes`]. - /// See that method for more detailed documentation. - num_key_bytes: u64, - /// Given a full row, typed at some `ty: ProductType`, /// these columns are the ones that this index indexes. /// Projecting the `ty` to `self.indexed_columns` yields the index's type `self.key_type`. @@ -1133,19 +1058,13 @@ impl MemoryUsage for TableIndex { let Self { idx, key_type, - num_rows, - num_key_bytes, indexed_columns, } = self; - idx.heap_usage() - + key_type.heap_usage() - + num_rows.heap_usage() - + num_key_bytes.heap_usage() - + indexed_columns.heap_usage() + idx.heap_usage() + key_type.heap_usage() + indexed_columns.heap_usage() } } -static_assert_size!(TableIndex, 88); +static_assert_size!(TableIndex, 80); impl TableIndex { /// Returns a new possibly unique index, with `index_id` for a choice of indexing algorithm. @@ -1160,8 +1079,6 @@ impl TableIndex { Ok(Self { idx: typed_index, key_type, - num_rows: 0, - num_key_bytes: 0, indexed_columns, }) } @@ -1175,8 +1092,6 @@ impl TableIndex { Self { idx, key_type, - num_rows: 0, - num_key_bytes: 0, indexed_columns, } } @@ -1202,33 +1117,14 @@ impl TableIndex { // SAFETY: // 1. We're passing the same `ColList` that was provided during construction. // 2. Forward the caller's proof obligation. - let res = unsafe { self.idx.insert(&self.indexed_columns, row_ref) }; - match res { - Ok(key_size) => { - // No existing row; the new row was inserted. - // Update the `num_rows` and `num_key_bytes` counters - // to account for the new insertion. - self.num_rows += 1; - self.num_key_bytes += key_size as u64; - Ok(()) - } - Err(e) => Err(e), - } + unsafe { self.idx.insert(&self.indexed_columns, row_ref) } } /// Deletes `row_ref` with its indexed value `row_ref.project(&self.indexed_columns)` from this index. /// /// Returns whether `ptr` was present. pub fn delete(&mut self, row_ref: RowRef<'_>) -> Result { - if let Some(size_in_bytes) = self.idx.delete(&self.indexed_columns, row_ref)? { - // Was present, and deleted: update the `num_rows` and `num_key_bytes` counters. - self.num_rows -= 1; - self.num_key_bytes -= size_in_bytes as u64; - Ok(true) - } else { - // Was not present: don't update counters. - Ok(false) - } + self.idx.delete(&self.indexed_columns, row_ref) } /// Returns whether `value` is in this index. @@ -1308,24 +1204,24 @@ impl TableIndex { | (BtreeString(_), BtreeString(_)) | (BtreeAV(_), BtreeAV(_)) => Ok(()), // For unique indices, we'll need to see if everything in `other` can be added to `idx`. - (UniqueBtreeBool(idx), UniqueBtreeBool(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeU8(idx), UniqueBtreeU8(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeSumTag(idx), UniqueBtreeSumTag(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeI8(idx), UniqueBtreeI8(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeU16(idx), UniqueBtreeU16(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeI16(idx), UniqueBtreeI16(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeU32(idx), UniqueBtreeU32(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeI32(idx), UniqueBtreeI32(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeU64(idx), UniqueBtreeU64(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeI64(idx), UniqueBtreeI64(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeU128(idx), UniqueBtreeU128(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeI128(idx), UniqueBtreeI128(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeU256(idx), UniqueBtreeU256(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeI256(idx), UniqueBtreeI256(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeF32(idx), UniqueBtreeF32(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeF64(idx), UniqueBtreeF64(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeString(idx), UniqueBtreeString(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), - (UniqueBtreeAV(idx), UniqueBtreeAV(other)) => idx.can_merge(other, ignore).map_err(|ptr| *ptr), + (UniqueBtreeBool(idx), UniqueBtreeBool(other)) => idx.can_merge(other, ignore), + (UniqueBtreeU8(idx), UniqueBtreeU8(other)) => idx.can_merge(other, ignore), + (UniqueBtreeSumTag(idx), UniqueBtreeSumTag(other)) => idx.can_merge(other, ignore), + (UniqueBtreeI8(idx), UniqueBtreeI8(other)) => idx.can_merge(other, ignore), + (UniqueBtreeU16(idx), UniqueBtreeU16(other)) => idx.can_merge(other, ignore), + (UniqueBtreeI16(idx), UniqueBtreeI16(other)) => idx.can_merge(other, ignore), + (UniqueBtreeU32(idx), UniqueBtreeU32(other)) => idx.can_merge(other, ignore), + (UniqueBtreeI32(idx), UniqueBtreeI32(other)) => idx.can_merge(other, ignore), + (UniqueBtreeU64(idx), UniqueBtreeU64(other)) => idx.can_merge(other, ignore), + (UniqueBtreeI64(idx), UniqueBtreeI64(other)) => idx.can_merge(other, ignore), + (UniqueBtreeU128(idx), UniqueBtreeU128(other)) => idx.can_merge(other, ignore), + (UniqueBtreeI128(idx), UniqueBtreeI128(other)) => idx.can_merge(other, ignore), + (UniqueBtreeU256(idx), UniqueBtreeU256(other)) => idx.can_merge(other, ignore), + (UniqueBtreeI256(idx), UniqueBtreeI256(other)) => idx.can_merge(other, ignore), + (UniqueBtreeF32(idx), UniqueBtreeF32(other)) => idx.can_merge(other, ignore), + (UniqueBtreeF64(idx), UniqueBtreeF64(other)) => idx.can_merge(other, ignore), + (UniqueBtreeString(idx), UniqueBtreeString(other)) => idx.can_merge(other, ignore), + (UniqueBtreeAV(idx), UniqueBtreeAV(other)) => idx.can_merge(other, ignore), (UniqueDirectU8(idx), UniqueDirectU8(other)) => idx.can_merge(other, ignore), (UniqueDirectSumTag(idx), UniqueDirectSumTag(other)) => idx.can_merge(other, ignore), (UniqueDirectU16(idx), UniqueDirectU16(other)) => idx.can_merge(other, ignore), @@ -1343,8 +1239,6 @@ impl TableIndex { /// rather than constructing a new `TableIndex`. pub fn clear(&mut self) { self.idx.clear(); - self.num_key_bytes = 0; - self.num_rows = 0; } /// The number of unique keys in this index. @@ -1358,7 +1252,7 @@ impl TableIndex { /// /// This method runs in constant time. pub fn num_rows(&self) -> u64 { - self.num_rows + self.idx.num_rows() as u64 } /// The number of bytes stored in keys in this index. @@ -1370,7 +1264,7 @@ impl TableIndex { /// /// See the [`KeySize`] trait for more details on how this method computes its result. pub fn num_key_bytes(&self) -> u64 { - self.num_key_bytes + self.idx.num_key_bytes() } } @@ -1483,15 +1377,15 @@ mod test { let row_ref = table.insert(&pool, &mut blob_store, &pv).unwrap().1; let value = get_fields(&cols, &pv); - prop_assert_eq!(index.idx.len(), 0); + prop_assert_eq!(index.num_rows(), 0); prop_assert_eq!(index.contains_any(&value), false); prop_assert_eq!(unsafe { index.check_and_insert(row_ref) }, Ok(())); - prop_assert_eq!(index.idx.len(), 1); + prop_assert_eq!(index.num_rows(), 1); prop_assert_eq!(index.contains_any(&value), true); prop_assert_eq!(index.delete(row_ref).unwrap(), true); - prop_assert_eq!(index.idx.len(), 0); + prop_assert_eq!(index.num_rows(), 0); prop_assert_eq!(index.contains_any(&value), false); } @@ -1505,7 +1399,7 @@ mod test { let value = get_fields(&cols, &pv); // Nothing in the index yet. - prop_assert_eq!(index.idx.len(), 0); + prop_assert_eq!(index.num_rows(), 0); prop_assert_eq!(violates_unique_constraint(&index, &cols, &pv), false); prop_assert_eq!( get_rows_that_violate_unique_constraint(&index, &value).unwrap().collect::>(), @@ -1517,7 +1411,7 @@ mod test { prop_assert_eq!(unsafe { index.check_and_insert(row_ref) }, Ok(())); // Inserting again would be a problem. - prop_assert_eq!(index.idx.len(), 1); + prop_assert_eq!(index.num_rows(), 1); prop_assert_eq!(violates_unique_constraint(&index, &cols, &pv), true); prop_assert_eq!( get_rows_that_violate_unique_constraint(&index, &value).unwrap().collect::>(), diff --git a/crates/table/src/table_index/multimap.rs b/crates/table/src/table_index/multimap.rs index 3da38a6f8fe..9fe3f423814 100644 --- a/crates/table/src/table_index/multimap.rs +++ b/crates/table/src/table_index/multimap.rs @@ -1,108 +1,145 @@ use super::same_key_entry::{same_key_iter, SameKeyEntry, SameKeyEntryIter}; -use core::{hash::Hash, ops::RangeBounds}; +use super::{key_size::KeyBytesStorage, Index, KeySize, RangedIndex}; +use crate::indexes::RowPointer; +use core::ops::RangeBounds; use spacetimedb_sats::memory_usage::MemoryUsage; use std::collections::btree_map::{BTreeMap, Range}; -/// A multi map that relates a `K` to a *set* of `V`s. +/// A multi map that relates a `K` to a *set* of `RowPointer`s. #[derive(Debug, PartialEq, Eq)] -pub struct MultiMap { +pub struct MultiMap { /// The map is backed by a `BTreeMap` for relating keys to values. /// /// A value set is stored as a `SmallVec`. /// This is an optimization over a `Vec<_>` /// as we allow a single element to be stored inline /// to improve performance for the common case of one element. - map: BTreeMap>, + map: BTreeMap, + /// The memoized number of rows indexed in `self.map`. + num_rows: usize, + /// Storage for [`Index::num_key_bytes`]. + num_key_bytes: u64, } -impl Default for MultiMap { +impl Default for MultiMap { fn default() -> Self { - Self { map: BTreeMap::new() } + Self { + map: <_>::default(), + num_rows: <_>::default(), + num_key_bytes: <_>::default(), + } } } -impl MemoryUsage for MultiMap { +impl MemoryUsage for MultiMap { fn heap_usage(&self) -> usize { - let Self { map } = self; - map.heap_usage() + let Self { + map, + num_rows, + num_key_bytes, + } = self; + map.heap_usage() + num_rows.heap_usage() + num_key_bytes.heap_usage() } } -impl MultiMap { - /// Inserts the relation `key -> val` to this multimap. +impl Index for MultiMap { + type Key = K; + + fn clone_structure(&self) -> Self { + <_>::default() + } + + /// Inserts the relation `key -> ptr` to this multimap. /// - /// The map does not check whether `key -> val` was already in the map. - /// It's assumed that the same `val` is never added twice, - /// and multimaps do not bind one `key` to the same `val`. - pub fn insert(&mut self, key: K, val: V) { - self.map.entry(key).or_default().push(val); + /// The map does not check whether `key -> ptr` was already in the map. + /// It's assumed that the same `ptr` is never added twice, + /// and multimaps do not bind one `key` to the same `ptr`. + fn insert(&mut self, key: Self::Key, ptr: RowPointer) -> Result<(), RowPointer> { + self.num_rows += 1; + self.num_key_bytes.add_to_key_bytes::(&key); + self.map.entry(key).or_default().push(ptr); + Ok(()) } - /// Deletes `key -> val` from this multimap. + /// Deletes `key -> ptr` from this multimap. /// - /// Returns whether `key -> val` was present. - pub fn delete(&mut self, key: &K, val: &V) -> bool { + /// Returns whether `key -> ptr` was present. + fn delete(&mut self, key: &K, ptr: RowPointer) -> bool { let Some(vset) = self.map.get_mut(key) else { return false; }; - let (deleted, is_empty) = vset.delete(val); + let (deleted, is_empty) = vset.delete(ptr); if is_empty { self.map.remove(key); } + if deleted { + self.num_rows -= 1; + self.num_key_bytes.sub_from_key_bytes::(key); + } + deleted } - /// Returns an iterator over the multimap that yields all the `V`s - /// of the `K`s that fall within the specified `range`. - pub fn values_in_range(&self, range: &impl RangeBounds) -> MultiMapRangeIter<'_, K, V> { - MultiMapRangeIter { - outer: self.map.range((range.start_bound(), range.end_bound())), - inner: SameKeyEntry::empty_iter(), - } - } + type PointIter<'a> + = SameKeyEntryIter<'a> + where + Self: 'a; - /// Returns an iterator over the multimap that yields all the `V`s of the `key: &K`. - pub fn values_in_point(&self, key: &K) -> SameKeyEntryIter<'_, V> { + fn seek_point(&self, key: &Self::Key) -> Self::PointIter<'_> { same_key_iter(self.map.get(key)) } - /// Returns the number of unique keys in the multimap. - pub fn num_keys(&self) -> usize { + fn num_keys(&self) -> usize { self.map.len() } - /// Returns the total number of entries in the multimap. - #[allow(unused)] // No use for this currently. - pub fn len(&self) -> usize { - self.map.values().map(|vals: &SameKeyEntry| vals.len()).sum() - } - - /// Returns whether there are any entries in the multimap. - #[allow(unused)] // No use for this currently. - pub fn is_empty(&self) -> bool { - self.len() == 0 + fn num_rows(&self) -> usize { + self.num_rows } /// Deletes all entries from the multimap, leaving it empty. /// This will not deallocate the outer map. - pub fn clear(&mut self) { + fn clear(&mut self) { self.map.clear(); + self.num_rows = 0; + self.num_key_bytes.reset_to_zero(); + } + + fn can_merge(&self, _: &Self, _: impl Fn(&RowPointer) -> bool) -> Result<(), RowPointer> { + // `self.insert` always returns `Ok(_)`. + Ok(()) + } +} + +impl RangedIndex for MultiMap { + type RangeIter<'a> + = MultiMapRangeIter<'a, K> + where + Self: 'a; + + /// Returns an iterator over the multimap that yields all the `V`s + /// of the `K`s that fall within the specified `range`. + fn seek_range(&self, range: &impl RangeBounds) -> Self::RangeIter<'_> { + MultiMapRangeIter { + outer: self.map.range((range.start_bound(), range.end_bound())), + inner: SameKeyEntry::empty_iter(), + } } } /// An iterator over values in a [`MultiMap`] where the keys are in a certain range. -pub struct MultiMapRangeIter<'a, K, V: Eq + Hash> { +pub struct MultiMapRangeIter<'a, K> { /// The outer iterator seeking for matching keys in the range. - outer: Range<'a, K, SameKeyEntry>, + outer: Range<'a, K, SameKeyEntry>, /// The inner iterator for the value set for a found key. - inner: SameKeyEntryIter<'a, V>, + inner: SameKeyEntryIter<'a>, } -impl<'a, K, V: Eq + Hash> Iterator for MultiMapRangeIter<'a, K, V> { - type Item = &'a V; +impl Iterator for MultiMapRangeIter<'_, K> { + type Item = RowPointer; fn next(&mut self) -> Option { loop { @@ -110,7 +147,6 @@ impl<'a, K, V: Eq + Hash> Iterator for MultiMapRangeIter<'a, K, V> { if let Some(val) = self.inner.next() { return Some(val); } - // Advance and get a new inner, if possible, or quit. // We'll come back and yield elements from it in the next iteration. let inner = self.outer.next().map(|(_, i)| i)?; diff --git a/crates/table/src/table_index/same_key_entry.rs b/crates/table/src/table_index/same_key_entry.rs index 3e4037f94c2..6ed37607fd9 100644 --- a/crates/table/src/table_index/same_key_entry.rs +++ b/crates/table/src/table_index/same_key_entry.rs @@ -1,5 +1,4 @@ use crate::{indexes::RowPointer, static_assert_size}; -use core::hash::Hash; use core::slice; use smallvec::SmallVec; use spacetimedb_data_structures::map::{hash_set, HashCollectionExt, HashSet}; @@ -14,7 +13,7 @@ use spacetimedb_memory_usage::MemoryUsage; /// that deals with a smaller number of values in the first variant /// and with a larger number in the second variant. #[derive(Debug, PartialEq, Eq)] -pub(super) enum SameKeyEntry { +pub(super) enum SameKeyEntry { /// A small number of values. /// /// No ordering is kept between values. @@ -29,7 +28,7 @@ pub(super) enum SameKeyEntry { /// Up to two values are represented inline here. /// It's not profitable to represent this as a separate variant /// as that would increase `size_of::()` by 8 bytes. - Small(SmallVec<[V; 2]>), + Small(SmallVec<[RowPointer; 2]>), /// A large number of values. /// @@ -39,18 +38,18 @@ pub(super) enum SameKeyEntry { /// Note that using a `HashSet`, with `S = RandomState`, /// entails that the iteration order is not deterministic. /// This is observed when doing queries against the index. - Large(HashSet), + Large(HashSet), } -static_assert_size!(SameKeyEntry, 32); +static_assert_size!(SameKeyEntry, 32); -impl Default for SameKeyEntry { +impl Default for SameKeyEntry { fn default() -> Self { Self::Small(<_>::default()) } } -impl MemoryUsage for SameKeyEntry { +impl MemoryUsage for SameKeyEntry { fn heap_usage(&self) -> usize { match self { Self::Small(x) => x.heap_usage(), @@ -59,17 +58,17 @@ impl MemoryUsage for SameKeyEntry { } } -impl SameKeyEntry { +impl SameKeyEntry { /// The number of elements /// beyond which the strategy is changed from small to large storage. - const LARGE_AFTER_LEN: usize = 4096 / size_of::(); + const LARGE_AFTER_LEN: usize = 4096 / size_of::(); /// Pushes `val` as an entry for the key. /// /// This assumes that `val` was previously not recorded. /// The structure does not check whether it was previously resident. /// As a consequence, the time complexity is `O(k)` amortized. - pub(super) fn push(&mut self, val: V) { + pub(super) fn push(&mut self, val: RowPointer) { match self { Self::Small(list) if list.len() <= Self::LARGE_AFTER_LEN => { list.push(val); @@ -93,11 +92,11 @@ impl SameKeyEntry { /// Deletes `val` as an entry for the key. /// /// Returns `(was_deleted, is_empty)`. - pub(super) fn delete(&mut self, val: &V) -> (bool, bool) { + pub(super) fn delete(&mut self, val: RowPointer) -> (bool, bool) { match self { Self::Small(list) => { // The `list` is not sorted, so we have to do a linear scan first. - if let Some(idx) = list.iter().position(|v| v == val) { + if let Some(idx) = list.iter().position(|v| *v == val) { list.swap_remove(idx); (true, list.is_empty()) } else { @@ -105,7 +104,7 @@ impl SameKeyEntry { } } Self::Large(set) => { - let removed = set.remove(val); + let removed = set.remove(&val); let empty = set.is_empty(); (removed, empty) } @@ -113,7 +112,7 @@ impl SameKeyEntry { } /// Returns an iterator over all the entries for this key. - pub(super) fn iter(&self) -> SameKeyEntryIter<'_, V> { + pub(super) fn iter(&self) -> SameKeyEntryIter<'_> { match self { Self::Small(list) => SameKeyEntryIter::Small(list.iter()), Self::Large(set) => SameKeyEntryIter::Large(set.iter().into()), @@ -121,22 +120,14 @@ impl SameKeyEntry { } /// Returns an iterator over no entries. - pub(super) fn empty_iter<'a>() -> SameKeyEntryIter<'a, V> { + pub(super) fn empty_iter<'a>() -> SameKeyEntryIter<'a> { SameKeyEntryIter::Small(const { &[] }.iter()) } - - /// Returns the number of entries for the same key. - pub(super) fn len(&self) -> usize { - match self { - Self::Small(list) => list.len(), - Self::Large(set) => set.len(), - } - } } /// Returns an iterator for a key's entries `ske`. /// This efficiently handles the case where there's no key (`None`). -pub(super) fn same_key_iter(ske: Option<&SameKeyEntry>) -> SameKeyEntryIter<'_, V> { +pub(super) fn same_key_iter(ske: Option<&SameKeyEntry>) -> SameKeyEntryIter<'_> { match ske { None => SameKeyEntry::empty_iter(), Some(ske) => ske.iter(), @@ -144,22 +135,23 @@ pub(super) fn same_key_iter(ske: Option<&SameKeyEntry>) -> Same } /// An iterator over values in a [`SameKeyEntry`]. -pub enum SameKeyEntryIter<'a, V> { - Small(slice::Iter<'a, V>), +pub enum SameKeyEntryIter<'a> { + Small(slice::Iter<'a, RowPointer>), /// This variant doesn't occur so much /// and we'd like to reduce the footprint of `SameKeyEntryIter`. - Large(Box>), + Large(Box>), } -static_assert_size!(SameKeyEntryIter, 16); +static_assert_size!(SameKeyEntryIter, 16); -impl<'a, V> Iterator for SameKeyEntryIter<'a, V> { - type Item = &'a V; +impl Iterator for SameKeyEntryIter<'_> { + type Item = RowPointer; fn next(&mut self) -> Option { match self { Self::Small(list) => list.next(), Self::Large(set) => set.next(), } + .copied() } } diff --git a/crates/table/src/table_index/unique_direct_fixed_cap_index.rs b/crates/table/src/table_index/unique_direct_fixed_cap_index.rs index 941a272e3de..7b076ef11ad 100644 --- a/crates/table/src/table_index/unique_direct_fixed_cap_index.rs +++ b/crates/table/src/table_index/unique_direct_fixed_cap_index.rs @@ -1,3 +1,4 @@ +use super::index::{Index, RangedIndex}; use super::unique_direct_index::{UniqueDirectIndexPointIter, NONE_PTR}; use crate::indexes::RowPointer; use core::mem; @@ -33,9 +34,13 @@ impl UniqueDirectFixedCapIndex { array: vec![NONE_PTR; cap].into(), } } +} + +impl Index for UniqueDirectFixedCapIndex { + type Key = u8; /// Clones the structure of the index and returns one with the same capacity. - pub fn clone_structure(&self) -> Self { + fn clone_structure(&self) -> Self { Self::new(self.array.len()) } @@ -45,9 +50,9 @@ impl UniqueDirectFixedCapIndex { /// Returns the existing associated value instead. /// /// Panics if the key is beyond the fixed capacity of this index. - pub fn insert(&mut self, key: usize, val: RowPointer) -> Result<(), RowPointer> { + fn insert(&mut self, key: Self::Key, val: RowPointer) -> Result<(), RowPointer> { // Fetch the slot. - let slot = &mut self.array[key]; + let slot = &mut self.array[key as usize]; let in_slot = *slot; if in_slot == NONE_PTR { // We have `NONE_PTR`, so not set yet. @@ -59,11 +64,8 @@ impl UniqueDirectFixedCapIndex { } } - /// Deletes `key` from this map. - /// - /// Returns whether `key` was present. - pub fn delete(&mut self, key: usize) -> bool { - let Some(slot) = self.array.get_mut(key) else { + fn delete(&mut self, &key: &Self::Key, _: RowPointer) -> bool { + let Some(slot) = self.array.get_mut(key as usize) else { return false; }; let old_val = mem::replace(slot, NONE_PTR); @@ -72,60 +74,26 @@ impl UniqueDirectFixedCapIndex { deleted } - /// Returns an iterator yielding the potential [`RowPointer`] for `key`. - pub fn seek_point(&self, key: usize) -> UniqueDirectIndexPointIter { - let point = self.array.get(key).copied().filter(|slot| *slot != NONE_PTR); - UniqueDirectIndexPointIter::new(point) - } + type PointIter<'a> + = UniqueDirectIndexPointIter + where + Self: 'a; - /// Returns an iterator yielding all the [`RowPointer`] that correspond to the provided `range`. - pub fn seek_range(&self, range: &impl RangeBounds) -> UniqueDirectFixedCapIndexRangeIter<'_> { - // Translate `range` to `start..end`. - let end = match range.end_bound() { - Bound::Included(&e) => e + 1, - Bound::Excluded(&e) => e, - Bound::Unbounded => self.array.len(), - }; - let start = match range.start_bound() { - Bound::Included(&s) => s, - Bound::Excluded(&s) => s + 1, - Bound::Unbounded => 0, - }; - - // Normalize `start` so that `start <= end`. - let start = start.min(end); - - // Make the iterator. - UniqueDirectFixedCapIndexRangeIter::new(self.array.get(start..end).unwrap_or_default()) - } - - /// Returns the number of unique keys in the index. - pub fn num_keys(&self) -> usize { - self.len + fn seek_point(&self, &key: &Self::Key) -> Self::PointIter<'_> { + let point = self.array.get(key as usize).copied().filter(|slot| *slot != NONE_PTR); + UniqueDirectIndexPointIter::new(point) } - /// Returns the total number of entries in the index. - pub fn len(&self) -> usize { + fn num_keys(&self) -> usize { self.len } - /// Returns whether there are any entries in the index. - #[allow(unused)] // No use for this currently. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Deletes all entries from the index, leaving it empty. - pub fn clear(&mut self) { + fn clear(&mut self) { self.array.fill(NONE_PTR); self.len = 0; } - /// Returns whether `other` can be merged into `self` - /// with an error containing the element in `self` that caused the violation. - /// - /// The closure `ignore` indicates whether a row in `self` should be ignored. - pub(crate) fn can_merge(&self, other: &Self, ignore: impl Fn(&RowPointer) -> bool) -> Result<(), RowPointer> { + fn can_merge(&self, other: &Self, ignore: impl Fn(&RowPointer) -> bool) -> Result<(), RowPointer> { for (slot_s, slot_o) in self.array.iter().zip(other.array.iter()) { let ptr_s = slot_s.with_reserved_bit(false); if *slot_s != NONE_PTR && *slot_o != NONE_PTR && !ignore(&ptr_s) { @@ -137,6 +105,33 @@ impl UniqueDirectFixedCapIndex { } } +impl RangedIndex for UniqueDirectFixedCapIndex { + type RangeIter<'a> + = UniqueDirectFixedCapIndexRangeIter<'a> + where + Self: 'a; + + fn seek_range(&self, range: &impl RangeBounds) -> Self::RangeIter<'_> { + // Translate `range` to `start..end`. + let end = match range.end_bound() { + Bound::Included(&e) => e as usize + 1, + Bound::Excluded(&e) => e as usize, + Bound::Unbounded => self.array.len(), + }; + let start = match range.start_bound() { + Bound::Included(&s) => s as usize, + Bound::Excluded(&s) => s as usize + 1, + Bound::Unbounded => 0, + }; + + // Normalize `start` so that `start <= end`. + let start = start.min(end); + + // Make the iterator. + UniqueDirectFixedCapIndexRangeIter::new(self.array.get(start..end).unwrap_or_default()) + } +} + /// An iterator over a range of keys in a [`UniqueDirectFixedCapIndex`]. #[derive(Debug)] pub struct UniqueDirectFixedCapIndexRangeIter<'a> { @@ -167,13 +162,13 @@ mod test { use core::ops::Range; use proptest::prelude::*; - fn range(start: u8, end: u8) -> Range { + fn range(start: u8, end: u8) -> Range { let min = start.min(end); let max = start.max(end); - min as usize..max as usize + min..max } - fn setup(start: u8, end: u8) -> (UniqueDirectFixedCapIndex, Range, Vec) { + fn setup(start: u8, end: u8) -> (UniqueDirectFixedCapIndex, Range, Vec) { let range = range(start, end); let (keys, ptrs): (Vec<_>, Vec<_>) = range.clone().zip(gen_row_pointers()).unzip(); @@ -181,7 +176,7 @@ mod test { for (key, ptr) in keys.iter().zip(&ptrs) { index.insert(*key, *ptr).unwrap(); } - assert_eq!(index.len(), range.end - range.start); + assert_eq!(index.num_rows(), (range.end - range.start) as usize); (index, range, ptrs) } @@ -209,15 +204,15 @@ mod test { return Err(TestCaseError::Reject("empty range".into())); } - let key = (key as usize).clamp(range.start, range.end.saturating_sub(1)); + let key = key.clamp(range.start, range.end.saturating_sub(1)); - let ptr = index.seek_point(key).next().unwrap(); - assert!(index.delete(key)); - assert!(!index.delete(key)); - assert_eq!(index.len(), range.end - range.start - 1); + let ptr = index.seek_point(&key).next().unwrap(); + assert!(index.delete(&key, ptr)); + assert!(!index.delete(&key, ptr)); + assert_eq!(index.num_rows(), (range.end - range.start - 1) as usize); index.insert(key, ptr).unwrap(); - assert_eq!(index.len(), range.end - range.start); + assert_eq!(index.num_rows(), (range.end - range.start) as usize); } } } diff --git a/crates/table/src/table_index/unique_direct_index.rs b/crates/table/src/table_index/unique_direct_index.rs index 53b8805dcc0..08cb21d4ab3 100644 --- a/crates/table/src/table_index/unique_direct_index.rs +++ b/crates/table/src/table_index/unique_direct_index.rs @@ -1,4 +1,6 @@ +use super::index::Index; use crate::indexes::{PageIndex, PageOffset, RowPointer, SquashedOffset}; +use crate::table_index::index::RangedIndex; use core::mem; use core::ops::{Bound, RangeBounds}; use core::option::IntoIter; @@ -92,12 +94,14 @@ impl InnerIndex { } } -impl UniqueDirectIndex { - /// Inserts the relation `key -> val` to this index. - /// - /// If `key` was already present in the index, does not add an association with `val`. - /// Returns the existing associated value instead. - pub fn insert(&mut self, key: usize, val: RowPointer) -> Result<(), RowPointer> { +impl Index for UniqueDirectIndex { + type Key = usize; + + fn clone_structure(&self) -> Self { + Self::default() + } + + fn insert(&mut self, key: Self::Key, val: RowPointer) -> Result<(), RowPointer> { let (key_outer, key_inner) = split_key(key); // Fetch the outer index and ensure it can house `key_outer`. @@ -122,10 +126,7 @@ impl UniqueDirectIndex { } } - /// Deletes `key` from this map. - /// - /// Returns whether `key` was present. - pub fn delete(&mut self, key: usize) -> bool { + fn delete(&mut self, &key: &Self::Key, _: RowPointer) -> bool { let (key_outer, key_inner) = split_key(key); let outer = &mut self.outer; if let Some(Some(inner)) = outer.get_mut(key_outer) { @@ -138,8 +139,12 @@ impl UniqueDirectIndex { false } - /// Returns an iterator yielding the potential [`RowPointer`] for `key`. - pub fn seek_point(&self, key: usize) -> UniqueDirectIndexPointIter { + type PointIter<'a> + = UniqueDirectIndexPointIter + where + Self: 'a; + + fn seek_point(&self, &key: &Self::Key) -> Self::PointIter<'_> { let (outer_key, inner_key) = split_key(key); let point = self .outer @@ -150,8 +155,45 @@ impl UniqueDirectIndex { UniqueDirectIndexPointIter::new(point) } + fn num_keys(&self) -> usize { + self.len + } + + /// Deletes all entries from the index, leaving it empty. + /// This will not deallocate the outer index. + fn clear(&mut self) { + self.outer.clear(); + self.len = 0; + } + + /// Returns whether `other` can be merged into `self` + /// with an error containing the element in `self` that caused the violation. + /// + /// The closure `ignore` indicates whether a row in `self` should be ignored. + fn can_merge(&self, other: &Self, ignore: impl Fn(&RowPointer) -> bool) -> Result<(), RowPointer> { + for (inner_s, inner_o) in self.outer.iter().zip(&other.outer) { + let (Some(inner_s), Some(inner_o)) = (inner_s, inner_o) else { + continue; + }; + + for (slot_s, slot_o) in inner_s.inner.iter().zip(inner_o.inner.iter()) { + let ptr_s = slot_s.with_reserved_bit(false); + if *slot_s != NONE_PTR && *slot_o != NONE_PTR && !ignore(&ptr_s) { + // For the same key, we found both slots occupied, so we cannot merge. + return Err(ptr_s); + } + } + } + + Ok(()) + } +} + +impl RangedIndex for UniqueDirectIndex { + type RangeIter<'a> = UniqueDirectIndexRangeIter<'a>; + /// Returns an iterator yielding all the [`RowPointer`] that correspond to the provided `range`. - pub fn seek_range(&self, range: &impl RangeBounds) -> UniqueDirectIndexRangeIter<'_> { + fn seek_range(&self, range: &impl RangeBounds) -> Self::RangeIter<'_> { // The upper bound of possible key. // This isn't necessarily the real max key actually present in the index, // due to possible deletions. @@ -181,51 +223,6 @@ impl UniqueDirectIndex { end, } } - - /// Returns the number of unique keys in the index. - pub fn num_keys(&self) -> usize { - self.len - } - - /// Returns the total number of entries in the index. - pub fn len(&self) -> usize { - self.len - } - - /// Returns whether there are any entries in the index. - #[allow(unused)] // No use for this currently. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Deletes all entries from the index, leaving it empty. - /// This will not deallocate the outer index. - pub fn clear(&mut self) { - self.outer.clear(); - self.len = 0; - } - - /// Returns whether `other` can be merged into `self` - /// with an error containing the element in `self` that caused the violation. - /// - /// The closure `ignore` indicates whether a row in `self` should be ignored. - pub(crate) fn can_merge(&self, other: &Self, ignore: impl Fn(&RowPointer) -> bool) -> Result<(), RowPointer> { - for (inner_s, inner_o) in self.outer.iter().zip(&other.outer) { - let (Some(inner_s), Some(inner_o)) = (inner_s, inner_o) else { - continue; - }; - - for (slot_s, slot_o) in inner_s.inner.iter().zip(inner_o.inner.iter()) { - let ptr_s = slot_s.with_reserved_bit(false); - if *slot_s != NONE_PTR && *slot_o != NONE_PTR && !ignore(&ptr_s) { - // For the same key, we found both slots occupied, so we cannot merge. - return Err(ptr_s); - } - } - } - - Ok(()) - } } /// An iterator over the potential value in a [`UniqueDirectMap`] for a given key. @@ -324,7 +321,7 @@ pub(super) mod test { for (key, ptr) in keys.iter().zip(&ptrs) { index.insert(*key, *ptr).unwrap(); } - assert_eq!(index.len(), 4); + assert_eq!(index.num_rows(), 4); let ptrs_found = index.seek_range(&range).collect::>(); assert_eq!(ptrs, ptrs_found); @@ -354,15 +351,15 @@ pub(super) mod test { for (key, ptr) in keys.iter().zip(&ptrs) { index.insert(*key, *ptr).unwrap(); } - assert_eq!(index.len(), 4); + assert_eq!(index.num_rows(), 4); let key = KEYS_PER_INNER + 1; - let ptr = index.seek_point(key).next().unwrap(); - assert!(index.delete(key)); - assert!(!index.delete(key)); - assert_eq!(index.len(), 3); + let ptr = index.seek_point(&key).next().unwrap(); + assert!(index.delete(&key, ptr)); + assert!(!index.delete(&key, ptr)); + assert_eq!(index.num_rows(), 3); index.insert(key, ptr).unwrap(); - assert_eq!(index.len(), 4); + assert_eq!(index.num_rows(), 4); } } diff --git a/crates/table/src/table_index/uniquemap.rs b/crates/table/src/table_index/uniquemap.rs index d21fcf5bb54..ae72f0a560f 100644 --- a/crates/table/src/table_index/uniquemap.rs +++ b/crates/table/src/table_index/uniquemap.rs @@ -1,92 +1,89 @@ +use super::{Index, KeySize, RangedIndex}; +use crate::{indexes::RowPointer, table_index::key_size::KeyBytesStorage}; use core::{ops::RangeBounds, option::IntoIter}; use spacetimedb_sats::memory_usage::MemoryUsage; use std::collections::btree_map::{BTreeMap, Entry, Range}; -/// A "unique map" that relates a `K` to a `V`. +/// A "unique map" that relates a `K` to a `RowPointer`. /// -/// (This is just a `BTreeMap`) with a slightly modified interface. +/// (This is just a `BTreeMap`) with a slightly modified interface. #[derive(Debug, PartialEq, Eq, Clone)] -pub struct UniqueMap { +pub struct UniqueMap { /// The map is backed by a `BTreeMap` for relating a key to a value. - map: BTreeMap, + map: BTreeMap, + /// Storage for [`Index::num_key_bytes`]. + num_key_bytes: K::MemoStorage, } -impl Default for UniqueMap { +impl Default for UniqueMap { fn default() -> Self { - Self { map: BTreeMap::new() } + Self { + map: <_>::default(), + num_key_bytes: <_>::default(), + } } } -impl MemoryUsage for UniqueMap { +impl MemoryUsage for UniqueMap { fn heap_usage(&self) -> usize { - let Self { map } = self; - map.heap_usage() + let Self { map, num_key_bytes } = self; + map.heap_usage() + num_key_bytes.heap_usage() } } -impl UniqueMap { - /// Inserts the relation `key -> val` to this map. - /// - /// If `key` was already present in the map, does not add an association with `val`. - /// Returns the existing associated value instead. - pub fn insert(&mut self, key: K, val: V) -> Result<(), &V> { +impl Index for UniqueMap { + type Key = K; + + fn clone_structure(&self) -> Self { + Self::default() + } + + fn insert(&mut self, key: K, val: RowPointer) -> Result<(), RowPointer> { match self.map.entry(key) { Entry::Vacant(e) => { + self.num_key_bytes.add_to_key_bytes::(e.key()); e.insert(val); Ok(()) } - Entry::Occupied(e) => Err(e.into_mut()), + Entry::Occupied(e) => Err(*e.into_mut()), } } - /// Deletes `key` from this map. - /// - /// Returns whether `key` was present. - pub fn delete(&mut self, key: &K) -> bool { - self.map.remove(key).is_some() - } - - /// Returns an iterator over the map that yields all the `V`s - /// of the `K`s that fall within the specified `range`. - pub fn values_in_range(&self, range: &impl RangeBounds) -> UniqueMapRangeIter<'_, K, V> { - UniqueMapRangeIter { - iter: self.map.range((range.start_bound(), range.end_bound())), + fn delete(&mut self, key: &K, _: RowPointer) -> bool { + let ret = self.map.remove(key).is_some(); + if ret { + self.num_key_bytes.sub_from_key_bytes::(key); } + ret } - /// Returns an iterator over the map that yields the potential `V` of the `key: &K`. - pub fn values_in_point(&self, key: &K) -> UniqueMapPointIter<'_, V> { - let iter = self.map.get(key).into_iter(); - UniqueMapPointIter { iter } + fn num_keys(&self) -> usize { + self.map.len() } - /// Returns the number of unique keys in the map. - pub fn num_keys(&self) -> usize { - self.len() + fn num_key_bytes(&self) -> u64 { + self.num_key_bytes.get(self) } - /// Returns the total number of entries in the map.s - pub fn len(&self) -> usize { - self.map.len() - } + type PointIter<'a> + = UniqueMapPointIter<'a> + where + Self: 'a; - /// Returns whether there are any entries in the map. - #[allow(unused)] // No use for this currently. - pub fn is_empty(&self) -> bool { - self.len() == 0 + fn seek_point(&self, key: &Self::Key) -> Self::PointIter<'_> { + let iter = self.map.get(key).into_iter(); + UniqueMapPointIter { iter } } /// Deletes all entries from the map, leaving it empty. - /// This will not deallocate the outer map. - pub fn clear(&mut self) { + /// + /// Unfortunately, this will drop the existing allocation. + fn clear(&mut self) { self.map.clear(); + self.num_key_bytes.reset_to_zero(); } - /// Returns whether `other` can be merged into `self` - /// with an error containing the element in `self` that caused the violation. - /// - /// The closure `ignore` indicates whether a row in `self` should be ignored. - pub(crate) fn can_merge(&self, other: &Self, ignore: impl Fn(&V) -> bool) -> Result<(), &V> { + fn can_merge(&self, other: &Self, ignore: impl Fn(&RowPointer) -> bool) -> Result<(), RowPointer> { let Some(found) = other .map .keys() @@ -94,34 +91,47 @@ impl UniqueMap { else { return Ok(()); }; - Err(found) + Err(*found) } } /// An iterator over the potential value in a [`UniqueMap`] for a given key. -pub struct UniqueMapPointIter<'a, V> { +pub struct UniqueMapPointIter<'a> { /// The iterator seeking for matching keys in the range. - iter: IntoIter<&'a V>, + iter: IntoIter<&'a RowPointer>, } -impl<'a, V> Iterator for UniqueMapPointIter<'a, V> { - type Item = &'a V; +impl<'a> Iterator for UniqueMapPointIter<'a> { + type Item = RowPointer; fn next(&mut self) -> Option { - self.iter.next() + self.iter.next().copied() + } +} + +impl RangedIndex for UniqueMap { + type RangeIter<'a> + = UniqueMapRangeIter<'a, K> + where + Self: 'a; + + fn seek_range(&self, range: &impl RangeBounds) -> Self::RangeIter<'_> { + UniqueMapRangeIter { + iter: self.map.range((range.start_bound(), range.end_bound())), + } } } /// An iterator over values in a [`UniqueMap`] where the keys are in a certain range. -pub struct UniqueMapRangeIter<'a, K, V> { +pub struct UniqueMapRangeIter<'a, K> { /// The iterator seeking for matching keys in the range. - iter: Range<'a, K, V>, + iter: Range<'a, K, RowPointer>, } -impl<'a, K, V> Iterator for UniqueMapRangeIter<'a, K, V> { - type Item = &'a V; +impl<'a, K> Iterator for UniqueMapRangeIter<'a, K> { + type Item = RowPointer; fn next(&mut self) -> Option { - self.iter.next().map(|(_, v)| v) + self.iter.next().map(|(_, v)| *v) } }