From 0cdd587120342429ecead67a958900fba58d388e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 6 May 2026 14:25:11 +0100 Subject: [PATCH 1/4] fix[patches]: ensure cached patches are updated and use `PatchesData` Signed-off-by: Joe Isaacs --- encodings/alp/src/alp/array.rs | 85 +++------- encodings/alp/src/alp_rd/array.rs | 68 ++------ .../fastlanes/src/bitpacking/array/mod.rs | 41 ++--- .../fastlanes/src/bitpacking/vtable/mod.rs | 50 ++---- encodings/sparse/public-api.lock | 22 ++- encodings/sparse/src/canonical.rs | 5 +- encodings/sparse/src/compute/cast.rs | 2 +- encodings/sparse/src/compute/filter.rs | 1 + encodings/sparse/src/compute/take.rs | 1 + encodings/sparse/src/lib.rs | 157 +++++++++++------- encodings/sparse/src/ops.rs | 1 + encodings/sparse/src/rules.rs | 3 +- encodings/sparse/src/slice.rs | 1 + vortex-array/public-api.lock | 38 +++++ vortex-array/src/patches.rs | 103 ++++++++++++ vortex-btrblocks/src/schemes/float.rs | 1 + vortex-btrblocks/src/schemes/integer.rs | 1 + vortex-btrblocks/src/schemes/string.rs | 1 + 18 files changed, 337 insertions(+), 244 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 63549dc24f8..d0a1804863f 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -26,6 +26,7 @@ use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::dtype::PType; use vortex_array::patches::Patches; +use vortex_array::patches::PatchesData; use vortex_array::patches::PatchesMetadata; use vortex_array::require_child; use vortex_array::require_patches; @@ -53,16 +54,13 @@ pub type ALPArray = Array; impl ArrayHash for ALPData { fn array_hash(&self, state: &mut H, _precision: Precision) { self.exponents.hash(state); - self.patch_offset.hash(state); - self.patch_offset_within_chunk.hash(state); + self.patches_data.hash(state); } } impl ArrayEq for ALPData { fn array_eq(&self, other: &Self, _precision: Precision) -> bool { - self.exponents == other.exponents - && self.patch_offset == other.patch_offset - && self.patch_offset_within_chunk == other.patch_offset_within_chunk + self.exponents == other.exponents && self.patches_data == other.patches_data } } @@ -85,18 +83,14 @@ impl VTable for ALP { slots: &[Option], ) -> VortexResult<()> { let slots = ALPSlotsView::from_slots(slots); - validate_parts( - dtype, + let patches = PatchesData::to_optional_patches( + data.patches_data.as_ref(), len, - data.exponents, - slots.encoded, - patches_from_slots( - &slots, - data.patch_offset, - data.patch_offset_within_chunk, - len, - ), - ) + slots.patch_indices, + slots.patch_values, + slots.patch_chunk_offsets, + ); + validate_parts(dtype, len, data.exponents, slots.encoded, patches) } fn nbuffers(_array: ArrayView<'_, Self>) -> usize { @@ -221,16 +215,15 @@ pub struct ALPSlots { #[derive(Clone, Debug)] pub struct ALPData { - patch_offset: Option, - patch_offset_within_chunk: Option, + patches_data: Option, exponents: Exponents, } impl Display for ALPData { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "exponents: {}", self.exponents)?; - if let Some(offset) = self.patch_offset { - write!(f, ", patch_offset: {offset}")?; + if let Some(pd) = &self.patches_data { + write!(f, ", patch_offset: {}", pd.offset())?; } Ok(()) } @@ -339,14 +332,8 @@ impl ALPData { /// See [`ALP::try_new`] for reference on preconditions that must pass before /// calling this method. pub fn new(exponents: Exponents, patches: Option) -> Self { - let (patch_offset, patch_offset_within_chunk) = match &patches { - Some(p) => (Some(p.offset()), p.offset_within_chunk()), - None => (None, None), - }; - Self { - patch_offset, - patch_offset_within_chunk, + patches_data: patches.as_ref().map(PatchesData::from_patches), exponents, } } @@ -411,14 +398,8 @@ impl ALP { impl ALPData { fn make_slots(encoded: &ArrayRef, patches: Option<&Patches>) -> Vec> { - let (patch_indices, patch_values, patch_chunk_offsets) = match patches { - Some(p) => ( - Some(p.indices().clone()), - Some(p.values().clone()), - p.chunk_offsets().clone(), - ), - None => (None, None, None), - }; + let (patch_indices, patch_values, patch_chunk_offsets) = + PatchesData::make_optional_slots(patches); vec![ Some(encoded.clone()), patch_indices, @@ -439,39 +420,17 @@ pub trait ALPArrayExt: ALPArraySlotsExt { } fn patches(&self) -> Option { - patches_from_slots( - &self.slots_view(), - self.patch_offset, - self.patch_offset_within_chunk, + let slots = self.slots_view(); + PatchesData::to_optional_patches( + self.patches_data.as_ref(), self.as_ref().len(), + slots.patch_indices, + slots.patch_values, + slots.patch_chunk_offsets, ) } } -fn patches_from_slots( - slots: &ALPSlotsView, - patch_offset: Option, - patch_offset_within_chunk: Option, - len: usize, -) -> Option { - match (slots.patch_indices, slots.patch_values) { - (Some(indices), Some(values)) => { - let patch_offset = patch_offset.vortex_expect("has patch slots but no patch_offset"); - Some(unsafe { - Patches::new_unchecked( - len, - patch_offset, - indices.clone(), - values.clone(), - slots.patch_chunk_offsets.cloned(), - patch_offset_within_chunk, - ) - }) - } - _ => None, - } -} - fn validate_parts( dtype: &DType, len: usize, diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index 0c25717db6f..dbd5d649bfd 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -31,6 +31,7 @@ use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; use vortex_array::patches::Patches; +use vortex_array::patches::PatchesData; use vortex_array::patches::PatchesMetadata; use vortex_array::require_child; use vortex_array::require_patches; @@ -74,8 +75,7 @@ impl ArrayHash for ALPRDData { fn array_hash(&self, state: &mut H, precision: Precision) { self.left_parts_dictionary.array_hash(state, precision); self.right_bit_width.hash(state); - self.patch_offset.hash(state); - self.patch_offset_within_chunk.hash(state); + self.patches_data.hash(state); } } @@ -84,8 +84,7 @@ impl ArrayEq for ALPRDData { self.left_parts_dictionary .array_eq(&other.left_parts_dictionary, precision) && self.right_bit_width == other.right_bit_width - && self.patch_offset == other.patch_offset - && self.patch_offset_within_chunk == other.patch_offset_within_chunk + && self.patches_data == other.patches_data } } @@ -112,13 +111,7 @@ impl VTable for ALPRD { len, left_parts_from_slots(slots), right_parts_from_slots(slots), - patches_from_slots( - slots, - data.patch_offset, - data.patch_offset_within_chunk, - len, - ) - .as_ref(), + patches_from_slots(slots, data.patches_data.as_ref(), len).as_ref(), ) } @@ -343,8 +336,7 @@ pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = [ #[derive(Clone, Debug)] pub struct ALPRDData { - patch_offset: Option, - patch_offset_within_chunk: Option, + patches_data: Option, left_parts_dictionary: Buffer, right_bit_width: u8, } @@ -352,8 +344,8 @@ pub struct ALPRDData { impl Display for ALPRDData { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "right_bit_width: {}", self.right_bit_width)?; - if let Some(offset) = self.patch_offset { - write!(f, ", patch_offset: {offset}")?; + if let Some(pd) = &self.patches_data { + write!(f, ", patch_offset: {}", pd.offset())?; } Ok(()) } @@ -438,14 +430,8 @@ impl ALPRDData { right_bit_width: u8, left_parts_patches: Option, ) -> Self { - let (patch_offset, patch_offset_within_chunk) = match &left_parts_patches { - Some(patches) => (Some(patches.offset()), patches.offset_within_chunk()), - None => (None, None), - }; - Self { - patch_offset, - patch_offset_within_chunk, + patches_data: left_parts_patches.as_ref().map(PatchesData::from_patches), left_parts_dictionary, right_bit_width, } @@ -466,14 +452,7 @@ impl ALPRDData { right_parts: &ArrayRef, patches: Option<&Patches>, ) -> Vec> { - let (pi, pv, pco) = match patches { - Some(p) => ( - Some(p.indices().clone()), - Some(p.values().clone()), - p.chunk_offsets().clone(), - ), - None => (None, None, None), - }; + let (pi, pv, pco) = PatchesData::make_optional_slots(patches); vec![ Some(left_parts.clone()), Some(right_parts.clone()), @@ -519,26 +498,16 @@ fn right_parts_from_slots(slots: &[Option]) -> &ArrayRef { fn patches_from_slots( slots: &[Option], - patch_offset: Option, - patch_offset_within_chunk: Option, + patches_data: Option<&PatchesData>, len: usize, ) -> Option { - match (&slots[LP_PATCH_INDICES_SLOT], &slots[LP_PATCH_VALUES_SLOT]) { - (Some(indices), Some(values)) => { - let patch_offset = patch_offset.vortex_expect("ALPRDArray patch slots without offset"); - Some(unsafe { - Patches::new_unchecked( - len, - patch_offset, - indices.clone(), - values.clone(), - slots[LP_PATCH_CHUNK_OFFSETS_SLOT].clone(), - patch_offset_within_chunk, - ) - }) - } - _ => None, - } + PatchesData::to_optional_patches( + patches_data, + len, + slots[LP_PATCH_INDICES_SLOT].as_ref(), + slots[LP_PATCH_VALUES_SLOT].as_ref(), + slots[LP_PATCH_CHUNK_OFFSETS_SLOT].as_ref(), + ) } fn validate_parts( @@ -626,8 +595,7 @@ pub trait ALPRDArrayExt: TypedArrayRef { fn left_parts_patches(&self) -> Option { patches_from_slots( self.as_ref().slots(), - self.patch_offset, - self.patch_offset_within_chunk, + self.patches_data.as_ref(), self.as_ref().len(), ) } diff --git a/encodings/fastlanes/src/bitpacking/array/mod.rs b/encodings/fastlanes/src/bitpacking/array/mod.rs index 75b75a51e99..f28212842ee 100644 --- a/encodings/fastlanes/src/bitpacking/array/mod.rs +++ b/encodings/fastlanes/src/bitpacking/array/mod.rs @@ -16,9 +16,9 @@ use vortex_array::dtype::DType; use vortex_array::dtype::NativePType; use vortex_array::dtype::PType; use vortex_array::patches::Patches; +use vortex_array::patches::PatchesData; use vortex_array::validity::Validity; use vortex_array::vtable::child_to_validity; -use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_error::vortex_err; @@ -60,10 +60,8 @@ pub struct BitPackedData { pub(super) offset: u16, pub(super) bit_width: u8, pub(super) packed: BufferHandle, - /// The offset metadata from patches, needed to reconstruct Patches from slots. - pub(super) patch_offset: Option, - /// The offset_within_chunk metadata from patches. - pub(super) patch_offset_within_chunk: Option, + /// Patch metadata for reconstructing Patches from slots. + pub(super) patches_data: Option, } impl Display for BitPackedData { @@ -126,17 +124,11 @@ impl BitPackedData { "Offset must be less than the full block i.e., 1024, got {offset}" ); - let (patch_offset, patch_offset_within_chunk) = match &patches { - Some(p) => (Some(p.offset()), p.offset_within_chunk()), - None => (None, None), - }; - Ok(Self { offset, bit_width, packed, - patch_offset, - patch_offset_within_chunk, + patches_data: patches.as_ref().map(PatchesData::from_patches), }) } @@ -294,24 +286,13 @@ pub trait BitPackedArrayExt: BitPackedArraySlotsExt { #[inline] fn patches(&self) -> Option { - match (self.patch_indices(), self.patch_values()) { - (Some(indices), Some(values)) => { - let patch_offset = self - .patch_offset - .vortex_expect("has patch slots but no patch_offset"); - Some(unsafe { - Patches::new_unchecked( - self.as_ref().len(), - patch_offset, - indices.clone(), - values.clone(), - self.patch_chunk_offsets().cloned(), - self.patch_offset_within_chunk, - ) - }) - } - _ => None, - } + PatchesData::to_optional_patches( + self.patches_data.as_ref(), + self.as_ref().len(), + self.patch_indices(), + self.patch_values(), + self.patch_chunk_offsets(), + ) } #[inline] diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index c11a6c4ec51..64489f22401 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -22,6 +22,7 @@ use vortex_array::dtype::DType; use vortex_array::dtype::PType; use vortex_array::match_each_integer_ptype; use vortex_array::patches::Patches; +use vortex_array::patches::PatchesData; use vortex_array::patches::PatchesMetadata; use vortex_array::require_patches; use vortex_array::require_validity; @@ -70,8 +71,7 @@ impl ArrayHash for BitPackedData { self.offset.hash(state); self.bit_width.hash(state); self.packed.array_hash(state, precision); - self.patch_offset.hash(state); - self.patch_offset_within_chunk.hash(state); + self.patches_data.hash(state); } } @@ -80,8 +80,7 @@ impl ArrayEq for BitPackedData { self.offset == other.offset && self.bit_width == other.bit_width && self.packed.array_eq(&other.packed, precision) - && self.patch_offset == other.patch_offset - && self.patch_offset_within_chunk == other.patch_offset_within_chunk + && self.patches_data == other.patches_data } } @@ -106,24 +105,13 @@ impl VTable for BitPacked { let slots = BitPackedSlotsView::from_slots(slots); let validity = child_to_validity(slots.validity_child, dtype.nullability()); - let patches = match (slots.patch_indices, slots.patch_values) { - (Some(indices), Some(values)) => { - let patch_offset = data - .patch_offset - .vortex_expect("has patch slots but no patch_offset"); - Some(unsafe { - Patches::new_unchecked( - len, - patch_offset, - indices.clone(), - values.clone(), - slots.patch_chunk_offsets.cloned(), - data.patch_offset_within_chunk, - ) - }) - } - _ => None, - }; + let patches = PatchesData::to_optional_patches( + data.patches_data.as_ref(), + len, + slots.patch_indices, + slots.patch_values, + slots.patch_chunk_offsets, + ); BitPackedData::validate( &data.packed, dtype.as_ptype(), @@ -224,14 +212,7 @@ impl VTable for BitPacked { .transpose()?; let slots = { - let (pi, pv, pco) = match &patches { - Some(p) => ( - Some(p.indices().clone()), - Some(p.values().clone()), - p.chunk_offsets().clone(), - ), - None => (None, None, None), - }; + let (pi, pv, pco) = PatchesData::make_optional_slots(patches.as_ref()); let validity_slot = validity_to_child(&validity, len); vec![pi, pv, pco, validity_slot] }; @@ -322,14 +303,7 @@ impl BitPacked { ) -> VortexResult { let dtype = DType::Primitive(ptype, validity.nullability()); let slots = { - let (pi, pv, pco) = match &patches { - Some(p) => ( - Some(p.indices().clone()), - Some(p.values().clone()), - p.chunk_offsets().clone(), - ), - None => (None, None, None), - }; + let (pi, pv, pco) = PatchesData::make_optional_slots(patches.as_ref()); let validity_slot = validity_to_child(&validity, len); vec![pi, pv, pco, validity_slot] }; diff --git a/encodings/sparse/public-api.lock b/encodings/sparse/public-api.lock index c8e9abb533a..d1f54401c82 100644 --- a/encodings/sparse/public-api.lock +++ b/encodings/sparse/public-api.lock @@ -90,9 +90,7 @@ pub fn vortex_sparse::SparseData::is_empty(&self) -> bool pub fn vortex_sparse::SparseData::len(&self) -> usize -pub fn vortex_sparse::SparseData::patches(&self) -> &vortex_array::patches::Patches - -pub fn vortex_sparse::SparseData::resolved_patches(&self) -> vortex_error::VortexResult +pub fn vortex_sparse::SparseData::offset(&self) -> usize pub fn vortex_sparse::SparseData::try_new_from_patches(vortex_array::patches::Patches, vortex_array::scalar::Scalar) -> vortex_error::VortexResult @@ -138,4 +136,22 @@ pub fn vortex_sparse::SparseMetadata::clear(&mut self) pub fn vortex_sparse::SparseMetadata::encoded_len(&self) -> usize +pub trait vortex_sparse::SparseExt + +pub fn vortex_sparse::SparseExt::patches(&self) -> vortex_array::patches::Patches + +pub fn vortex_sparse::SparseExt::resolved_patches(&self) -> vortex_error::VortexResult + +impl vortex_sparse::SparseExt for vortex_array::array::typed::Array + +pub fn vortex_array::array::typed::Array::patches(&self) -> vortex_array::patches::Patches + +pub fn vortex_array::array::typed::Array::resolved_patches(&self) -> vortex_error::VortexResult + +impl vortex_sparse::SparseExt for vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse> + +pub fn vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse>::patches(&self) -> vortex_array::patches::Patches + +pub fn vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse>::resolved_patches(&self) -> vortex_error::VortexResult + pub type vortex_sparse::SparseArray = vortex_array::array::typed::Array diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs index 2a794b96aaa..165b09df83c 100644 --- a/encodings/sparse/src/canonical.rs +++ b/encodings/sparse/src/canonical.rs @@ -57,6 +57,7 @@ use vortex_error::vortex_bail; use crate::ConstantArray; use crate::Sparse; use crate::SparseArray; +use crate::SparseExt as _; pub(super) fn execute_sparse( array: &SparseArray, ctx: &mut ExecutionCtx, @@ -84,7 +85,7 @@ pub(super) fn execute_sparse( struct_fields, array.fill_scalar().as_struct(), array.dtype().nullability(), - array.patches(), + &array.patches(), array.len(), ctx, )?, @@ -97,7 +98,7 @@ pub(super) fn execute_sparse( *decimal_dtype, *nullability, fill_value, - array.patches(), + &array.patches(), array.len(), ctx, )? diff --git a/encodings/sparse/src/compute/cast.rs b/encodings/sparse/src/compute/cast.rs index 663539a6030..35ce9474589 100644 --- a/encodings/sparse/src/compute/cast.rs +++ b/encodings/sparse/src/compute/cast.rs @@ -11,12 +11,12 @@ use vortex_array::scalar_fn::fns::cast::CastReduce; use vortex_error::VortexResult; use crate::Sparse; +use crate::SparseExt as _; impl CastReduce for Sparse { fn cast(array: ArrayView<'_, Self>, dtype: &DType) -> VortexResult> { let casted_patches = array .patches() - .clone() .map_values(|values| values.cast(dtype.clone()))?; let casted_fill = if array.patches().num_patches() == array.len() { diff --git a/encodings/sparse/src/compute/filter.rs b/encodings/sparse/src/compute/filter.rs index 2cc8064a6cd..f7871f124a6 100644 --- a/encodings/sparse/src/compute/filter.rs +++ b/encodings/sparse/src/compute/filter.rs @@ -11,6 +11,7 @@ use vortex_mask::Mask; use crate::ConstantArray; use crate::Sparse; +use crate::SparseExt as _; impl FilterKernel for Sparse { fn filter( array: ArrayView<'_, Self>, diff --git a/encodings/sparse/src/compute/take.rs b/encodings/sparse/src/compute/take.rs index b0b5f83cad0..ddf9000fac1 100644 --- a/encodings/sparse/src/compute/take.rs +++ b/encodings/sparse/src/compute/take.rs @@ -10,6 +10,7 @@ use vortex_error::VortexResult; use crate::ConstantArray; use crate::Sparse; +use crate::SparseExt as _; impl TakeExecute for Sparse { fn take( array: ArrayView<'_, Self>, diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 411bc9c7543..3903a7b3274 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -30,6 +30,7 @@ use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::patches::Patches; +use vortex_array::patches::PatchesData; use vortex_array::patches::PatchesMetadata; use vortex_array::scalar::Scalar; use vortex_array::scalar::ScalarValue; @@ -72,15 +73,18 @@ pub struct SparseMetadata { } impl ArrayHash for SparseData { - fn array_hash(&self, state: &mut H, precision: Precision) { - self.patches.array_hash(state, precision); + fn array_hash(&self, state: &mut H, _precision: Precision) { + self.array_len.hash(state); + self.patches_data.hash(state); self.fill_value.hash(state); } } impl ArrayEq for SparseData { - fn array_eq(&self, other: &Self, precision: Precision) -> bool { - self.patches.array_eq(&other.patches, precision) && self.fill_value == other.fill_value + fn array_eq(&self, other: &Self, _precision: Precision) -> bool { + self.array_len == other.array_len + && self.patches_data == other.patches_data + && self.fill_value == other.fill_value } } @@ -100,9 +104,10 @@ impl VTable for Sparse { data: &Self::ArrayData, dtype: &DType, len: usize, - _slots: &[Option], + slots: &[Option], ) -> VortexResult<()> { - SparseData::validate(data.patches(), data.fill_scalar(), dtype, len) + let patches = SparseData::patches_from_slots(data, len, slots); + SparseData::validate(&patches, data.fill_scalar(), dtype, len) } fn nbuffers(_array: ArrayView<'_, Self>) -> usize { @@ -181,7 +186,7 @@ impl VTable for Sparse { None, )?; let slots = SparseData::make_slots(&patches); - let data = SparseData::try_new_from_patches(patches, fill_value)?; + let data = SparseData::from_patches(&patches, fill_value)?; Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } @@ -217,7 +222,10 @@ pub(crate) const SLOT_NAMES: [&str; NUM_SLOTS] = #[derive(Clone, Debug)] pub struct SparseData { - patches: Patches, + /// The total length of the sparse array. + array_len: usize, + /// Patch metadata (offset, offset_within_chunk) for reconstructing Patches from slots. + patches_data: PatchesData, fill_value: Scalar, } @@ -241,7 +249,7 @@ impl Sparse { let dtype = fill_value.dtype().clone(); let patches = Patches::new(len, 0, indices, values, None)?; let slots = SparseData::make_slots(&patches); - let data = SparseData::try_new_from_patches(patches, fill_value)?; + let data = SparseData::from_patches(&patches, fill_value)?; Ok(unsafe { Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots)) }) @@ -251,7 +259,7 @@ impl Sparse { let dtype = fill_value.dtype().clone(); let len = patches.array_len(); let slots = SparseData::make_slots(&patches); - let data = SparseData::try_new_from_patches(patches, fill_value)?; + let data = SparseData::from_patches(&patches, fill_value)?; Ok(unsafe { Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots)) }) @@ -261,7 +269,7 @@ impl Sparse { let dtype = fill_value.dtype().clone(); let len = patches.array_len(); let slots = SparseData::make_slots(&patches); - let data = unsafe { SparseData::new_unchecked(patches, fill_value) }; + let data = SparseData::from_patches_unchecked(&patches, fill_value); unsafe { Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots)) } @@ -325,25 +333,41 @@ impl SparseData { } fn make_slots(patches: &Patches) -> Vec> { - vec![ - Some(patches.indices().clone()), - Some(patches.values().clone()), - patches.chunk_offsets().clone(), - ] + let (indices, values, chunk_offsets) = PatchesData::make_slots(patches); + vec![Some(indices), Some(values), chunk_offsets] } - /// Build a new SparseArray from an existing set of patches. + /// Reconstruct a [`Patches`] from the stored metadata and the array's slots. + fn patches_from_slots(data: &SparseData, len: usize, slots: &[Option]) -> Patches { + data.patches_data.to_patches( + len, + slots[0] + .clone() + .vortex_expect("SparseArray patch_indices slot"), + slots[1] + .clone() + .vortex_expect("SparseArray patch_values slot"), + slots[2].clone(), + ) + } + + /// Build a new SparseData from an existing set of patches, normalizing dtypes. pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult { let patches = Self::normalize_patches_dtype(patches, &fill_value)?; - Ok(Self { - patches, - fill_value, - }) + Ok(Self::from_patches_unchecked(&patches, fill_value)) } - pub(crate) unsafe fn new_unchecked(patches: Patches, fill_value: Scalar) -> Self { + /// Extract metadata from patches to create SparseData, with dtype normalization. + fn from_patches(patches: &Patches, fill_value: Scalar) -> VortexResult { + let patches = Self::normalize_patches_dtype(patches.clone(), &fill_value)?; + Ok(Self::from_patches_unchecked(&patches, fill_value)) + } + + /// Extract metadata from patches to create SparseData, without validation. + fn from_patches_unchecked(patches: &Patches, fill_value: Scalar) -> Self { Self { - patches, + array_len: patches.array_len(), + patches_data: PatchesData::from_patches(patches), fill_value, } } @@ -351,13 +375,13 @@ impl SparseData { /// Returns the length of the array. #[inline] pub fn len(&self) -> usize { - self.patches.array_len() + self.array_len } /// Returns whether the array is empty. #[inline] pub fn is_empty(&self) -> bool { - self.patches.array_len() == 0 + self.array_len == 0 } /// Returns the logical data type of the array. @@ -366,28 +390,10 @@ impl SparseData { self.fill_scalar().dtype() } + /// Returns the offset of the patches within the parent array. #[inline] - pub fn patches(&self) -> &Patches { - &self.patches - } - - #[inline] - pub fn resolved_patches(&self) -> VortexResult { - let patches = self.patches(); - let indices_offset = Scalar::from(patches.offset()).cast(patches.indices().dtype())?; - let indices = patches.indices().binary( - ConstantArray::new(indices_offset, patches.indices().len()).into_array(), - Operator::Sub, - )?; - - Patches::new( - patches.array_len(), - 0, - indices, - patches.values().clone(), - // TODO(0ax1): handle chunk offsets - None, - ) + pub fn offset(&self) -> usize { + self.patches_data.offset() } #[inline] @@ -494,25 +500,64 @@ impl SparseData { } } +/// Extension trait for accessing patches on [`SparseArray`] and [`ArrayView<'_, Sparse>`]. +/// +/// Patches are reconstructed from the array's slots and stored metadata on each call. +pub trait SparseExt { + /// Reconstruct patches from the array's slots and metadata. + fn patches(&self) -> Patches; + + /// Return patches with offset-resolved indices (offset subtracted from each index). + fn resolved_patches(&self) -> VortexResult { + let patches = self.patches(); + let indices_offset = Scalar::from(patches.offset()).cast(patches.indices().dtype())?; + let indices = patches.indices().binary( + ConstantArray::new(indices_offset, patches.indices().len()).into_array(), + Operator::Sub, + )?; + + Patches::new( + patches.array_len(), + 0, + indices, + patches.values().clone(), + // TODO(0ax1): handle chunk offsets + None, + ) + } +} + +impl SparseExt for ArrayView<'_, Sparse> { + fn patches(&self) -> Patches { + SparseData::patches_from_slots(self.data(), self.len(), self.slots()) + } +} + +impl SparseExt for Array { + fn patches(&self) -> Patches { + SparseData::patches_from_slots(self.data(), self.as_array().len(), self.slots()) + } +} + impl ValidityVTable for Sparse { fn validity(array: ArrayView<'_, Sparse>) -> VortexResult { - let patches = unsafe { + let orig_patches = array.patches(); + let validity_patches = unsafe { Patches::new_unchecked( - array.patches.array_len(), - array.patches.offset(), - array.patches.indices().clone(), - array - .patches + orig_patches.array_len(), + orig_patches.offset(), + orig_patches.indices().clone(), + orig_patches .values() .validity()? - .to_array(array.patches.values().len()), - array.patches.chunk_offsets().clone(), - array.patches.offset_within_chunk(), + .to_array(orig_patches.values().len()), + orig_patches.chunk_offsets().clone(), + orig_patches.offset_within_chunk(), ) }; Ok(Validity::Array( - unsafe { Sparse::new_unchecked(patches, array.fill_value.is_valid().into()) } + unsafe { Sparse::new_unchecked(validity_patches, array.fill_value.is_valid().into()) } .into_array(), )) } diff --git a/encodings/sparse/src/ops.rs b/encodings/sparse/src/ops.rs index cfa89c0a733..172114b7dfb 100644 --- a/encodings/sparse/src/ops.rs +++ b/encodings/sparse/src/ops.rs @@ -8,6 +8,7 @@ use vortex_array::vtable::OperationsVTable; use vortex_error::VortexResult; use crate::Sparse; +use crate::SparseExt as _; impl OperationsVTable for Sparse { fn scalar_at( diff --git a/encodings/sparse/src/rules.rs b/encodings/sparse/src/rules.rs index c53e7e4c6a5..d888d64a389 100644 --- a/encodings/sparse/src/rules.rs +++ b/encodings/sparse/src/rules.rs @@ -12,6 +12,7 @@ use vortex_array::scalar_fn::fns::not::NotReduceAdaptor; use vortex_error::VortexResult; use crate::Sparse; +use crate::SparseExt as _; pub(crate) static RULES: ParentRuleSet = ParentRuleSet::new(&[ ParentRuleSet::lift(&CastReduceAdaptor(Sparse)), @@ -21,7 +22,7 @@ pub(crate) static RULES: ParentRuleSet = ParentRuleSet::new(&[ impl NotReduce for Sparse { fn invert(array: ArrayView<'_, Self>) -> VortexResult> { let inverted_fill = array.fill_scalar().as_bool().invert().into_scalar(); - let inverted_patches = array.patches().clone().map_values(|values| values.not())?; + let inverted_patches = array.patches().map_values(|values| values.not())?; Ok(Some( Sparse::try_new_from_patches(inverted_patches, inverted_fill)?.into_array(), )) diff --git a/encodings/sparse/src/slice.rs b/encodings/sparse/src/slice.rs index 7a36d58e209..02d48b91d80 100644 --- a/encodings/sparse/src/slice.rs +++ b/encodings/sparse/src/slice.rs @@ -12,6 +12,7 @@ use vortex_error::VortexResult; use crate::ConstantArray; use crate::Sparse; +use crate::SparseExt as _; impl SliceKernel for Sparse { fn slice( diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 4a5316b0bd8..0f859cb44ed 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -13840,6 +13840,44 @@ impl vortex_array::ArrayHash for vortex_array::patches::Patches pub fn vortex_array::patches::Patches::array_hash(&self, &mut H, vortex_array::Precision) +pub struct vortex_array::patches::PatchesData + +impl vortex_array::patches::PatchesData + +pub fn vortex_array::patches::PatchesData::from_patches(&vortex_array::patches::Patches) -> Self + +pub fn vortex_array::patches::PatchesData::make_optional_slots(core::option::Option<&vortex_array::patches::Patches>) -> (core::option::Option, core::option::Option, core::option::Option) + +pub fn vortex_array::patches::PatchesData::make_slots(&vortex_array::patches::Patches) -> (vortex_array::ArrayRef, vortex_array::ArrayRef, core::option::Option) + +pub fn vortex_array::patches::PatchesData::offset(&self) -> usize + +pub fn vortex_array::patches::PatchesData::offset_within_chunk(&self) -> core::option::Option + +pub fn vortex_array::patches::PatchesData::to_optional_patches(core::option::Option<&Self>, usize, core::option::Option<&vortex_array::ArrayRef>, core::option::Option<&vortex_array::ArrayRef>, core::option::Option<&vortex_array::ArrayRef>) -> core::option::Option + +pub fn vortex_array::patches::PatchesData::to_patches(&self, usize, vortex_array::ArrayRef, vortex_array::ArrayRef, core::option::Option) -> vortex_array::patches::Patches + +impl core::clone::Clone for vortex_array::patches::PatchesData + +pub fn vortex_array::patches::PatchesData::clone(&self) -> vortex_array::patches::PatchesData + +impl core::cmp::Eq for vortex_array::patches::PatchesData + +impl core::cmp::PartialEq for vortex_array::patches::PatchesData + +pub fn vortex_array::patches::PatchesData::eq(&self, &vortex_array::patches::PatchesData) -> bool + +impl core::fmt::Debug for vortex_array::patches::PatchesData + +pub fn vortex_array::patches::PatchesData::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::patches::PatchesData + +pub fn vortex_array::patches::PatchesData::hash<__H: core::hash::Hasher>(&self, &mut __H) + +impl core::marker::StructuralPartialEq for vortex_array::patches::PatchesData + pub struct vortex_array::patches::PatchesMetadata impl vortex_array::patches::PatchesMetadata diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index fa532546b50..7ce88aeac84 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -122,6 +122,109 @@ impl PatchesMetadata { } } +/// Metadata stored in an array's data struct for reconstructing [`Patches`] from slots. +/// +/// The actual patch arrays (indices, values, chunk_offsets) live in the array's +/// slots. This struct stores only the scalar metadata needed to reassemble them. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct PatchesData { + offset: usize, + offset_within_chunk: Option, +} + +impl PatchesData { + /// Extract patch metadata from an existing [`Patches`]. + pub fn from_patches(patches: &Patches) -> Self { + Self { + offset: patches.offset(), + offset_within_chunk: patches.offset_within_chunk(), + } + } + + /// Reconstruct a [`Patches`] from this metadata and the slot arrays. + /// + /// # Safety + /// Caller must ensure the slot arrays satisfy the `Patches` invariants. + pub fn to_patches( + &self, + len: usize, + indices: ArrayRef, + values: ArrayRef, + chunk_offsets: Option, + ) -> Patches { + unsafe { + Patches::new_unchecked( + len, + self.offset, + indices, + values, + chunk_offsets, + self.offset_within_chunk, + ) + } + } + + /// Reconstruct an optional [`Patches`] from optional metadata and slot arrays. + /// + /// Returns `None` if `patches_data` is `None` or if indices/values slots are missing. + pub fn to_optional_patches( + patches_data: Option<&Self>, + len: usize, + indices: Option<&ArrayRef>, + values: Option<&ArrayRef>, + chunk_offsets: Option<&ArrayRef>, + ) -> Option { + let data = patches_data?; + match (indices, values) { + (Some(indices), Some(values)) => Some(unsafe { + Patches::new_unchecked( + len, + data.offset, + indices.clone(), + values.clone(), + chunk_offsets.cloned(), + data.offset_within_chunk, + ) + }), + _ => None, + } + } + + /// Extract the slot arrays (indices, values, chunk_offsets) from a [`Patches`]. + pub fn make_slots(patches: &Patches) -> (ArrayRef, ArrayRef, Option) { + ( + patches.indices().clone(), + patches.values().clone(), + patches.chunk_offsets().clone(), + ) + } + + /// Extract optional slot arrays from an optional [`Patches`]. + pub fn make_optional_slots( + patches: Option<&Patches>, + ) -> (Option, Option, Option) { + match patches { + Some(p) => { + let (indices, values, chunk_offsets) = Self::make_slots(p); + (Some(indices), Some(values), chunk_offsets) + } + None => (None, None, None), + } + } + + /// Returns the patch offset. + #[inline] + pub fn offset(&self) -> usize { + self.offset + } + + /// Returns the offset within the first chunk, if chunk offsets are present. + #[inline] + pub fn offset_within_chunk(&self) -> Option { + self.offset_within_chunk + } +} + /// A helper for working with patched arrays. #[derive(Debug, Clone)] pub struct Patches { diff --git a/vortex-btrblocks/src/schemes/float.rs b/vortex-btrblocks/src/schemes/float.rs index 8c55b38d345..d1fe7ec4f6e 100644 --- a/vortex-btrblocks/src/schemes/float.rs +++ b/vortex-btrblocks/src/schemes/float.rs @@ -27,6 +27,7 @@ use vortex_compressor::scheme::DescendantExclusion; use vortex_error::VortexResult; use vortex_error::vortex_panic; use vortex_sparse::Sparse; +use vortex_sparse::SparseExt as _; use super::integer::SparseScheme as IntSparseScheme; use crate::ArrayAndStats; diff --git a/vortex-btrblocks/src/schemes/integer.rs b/vortex-btrblocks/src/schemes/integer.rs index dfd61deb80b..87b4ac85ad6 100644 --- a/vortex-btrblocks/src/schemes/integer.rs +++ b/vortex-btrblocks/src/schemes/integer.rs @@ -42,6 +42,7 @@ use vortex_runend::RunEnd; use vortex_runend::compress::runend_encode; use vortex_sequence::sequence_encode; use vortex_sparse::Sparse; +use vortex_sparse::SparseExt as _; use vortex_zigzag::ZigZag; use vortex_zigzag::ZigZagArrayExt; use vortex_zigzag::zigzag_encode; diff --git a/vortex-btrblocks/src/schemes/string.rs b/vortex-btrblocks/src/schemes/string.rs index 0df5a268157..ade42f88668 100644 --- a/vortex-btrblocks/src/schemes/string.rs +++ b/vortex-btrblocks/src/schemes/string.rs @@ -22,6 +22,7 @@ use vortex_fsst::FSSTArrayExt; use vortex_fsst::fsst_compress; use vortex_fsst::fsst_train_compressor; use vortex_sparse::Sparse; +use vortex_sparse::SparseExt as _; use super::integer::IntDictScheme; use super::integer::SparseScheme as IntSparseScheme; From 1ce40fa5b9379c1a4ae63d54d7f70e8d9f6820f7 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 6 May 2026 17:28:18 +0100 Subject: [PATCH 2/4] fix[patches]: ensure cached patches are updated and use `PatchesData` Signed-off-by: Joe Isaacs --- encodings/alp/src/alp/array.rs | 39 +++---- encodings/alp/src/alp_rd/array.rs | 25 ++--- .../fastlanes/src/bitpacking/array/mod.rs | 14 ++- .../fastlanes/src/bitpacking/vtable/mod.rs | 28 +++-- encodings/sparse/src/lib.rs | 23 ++-- vortex-array/public-api.lock | 28 +++-- vortex-array/src/patches.rs | 102 ++++++++---------- 7 files changed, 128 insertions(+), 131 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index d0a1804863f..41e02712aa0 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -25,6 +25,7 @@ use vortex_array::arrays::Primitive; use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::dtype::PType; +use vortex_array::patches::PatchSlotIndices; use vortex_array::patches::Patches; use vortex_array::patches::PatchesData; use vortex_array::patches::PatchesMetadata; @@ -82,15 +83,10 @@ impl VTable for ALP { len: usize, slots: &[Option], ) -> VortexResult<()> { - let slots = ALPSlotsView::from_slots(slots); - let patches = PatchesData::to_optional_patches( - data.patches_data.as_ref(), - len, - slots.patch_indices, - slots.patch_values, - slots.patch_chunk_offsets, - ); - validate_parts(dtype, len, data.exponents, slots.encoded, patches) + let alp_slots = ALPSlotsView::from_slots(slots); + let patches = + PatchesData::patches_from_slots(data.patches_data.as_ref(), len, slots, PATCH_SLOTS); + validate_parts(dtype, len, data.exponents, alp_slots.encoded, patches) } fn nbuffers(_array: ArrayView<'_, Self>) -> usize { @@ -213,6 +209,12 @@ pub struct ALPSlots { pub patch_chunk_offsets: Option, } +const PATCH_SLOTS: PatchSlotIndices = PatchSlotIndices { + indices: ALPSlots::PATCH_INDICES, + values: ALPSlots::PATCH_VALUES, + chunk_offsets: ALPSlots::PATCH_CHUNK_OFFSETS, +}; + #[derive(Clone, Debug)] pub struct ALPData { patches_data: Option, @@ -398,14 +400,9 @@ impl ALP { impl ALPData { fn make_slots(encoded: &ArrayRef, patches: Option<&Patches>) -> Vec> { - let (patch_indices, patch_values, patch_chunk_offsets) = - PatchesData::make_optional_slots(patches); - vec![ - Some(encoded.clone()), - patch_indices, - patch_values, - patch_chunk_offsets, - ] + let mut slots = vec![Some(encoded.clone())]; + PatchesData::push_slots(&mut slots, patches); + slots } #[inline] @@ -420,13 +417,11 @@ pub trait ALPArrayExt: ALPArraySlotsExt { } fn patches(&self) -> Option { - let slots = self.slots_view(); - PatchesData::to_optional_patches( + PatchesData::patches_from_slots( self.patches_data.as_ref(), self.as_ref().len(), - slots.patch_indices, - slots.patch_values, - slots.patch_chunk_offsets, + self.as_ref().slots(), + PATCH_SLOTS, ) } } diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index dbd5d649bfd..950e95d153e 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -30,6 +30,7 @@ use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; +use vortex_array::patches::PatchSlotIndices; use vortex_array::patches::Patches; use vortex_array::patches::PatchesData; use vortex_array::patches::PatchesMetadata; @@ -320,6 +321,11 @@ pub(super) const LEFT_PARTS_SLOT: usize = 0; /// The right (least significant) parts of the real-double encoded values. pub(super) const RIGHT_PARTS_SLOT: usize = 1; /// The indices of left-parts exception values that could not be dictionary-encoded. +pub(super) const LP_PATCH_SLOTS: PatchSlotIndices = PatchSlotIndices { + indices: LP_PATCH_INDICES_SLOT, + values: LP_PATCH_VALUES_SLOT, + chunk_offsets: LP_PATCH_CHUNK_OFFSETS_SLOT, +}; pub(super) const LP_PATCH_INDICES_SLOT: usize = 2; /// The exception values for left-parts that could not be dictionary-encoded. pub(super) const LP_PATCH_VALUES_SLOT: usize = 3; @@ -452,14 +458,9 @@ impl ALPRDData { right_parts: &ArrayRef, patches: Option<&Patches>, ) -> Vec> { - let (pi, pv, pco) = PatchesData::make_optional_slots(patches); - vec![ - Some(left_parts.clone()), - Some(right_parts.clone()), - pi, - pv, - pco, - ] + let mut slots = vec![Some(left_parts.clone()), Some(right_parts.clone())]; + PatchesData::push_slots(&mut slots, patches); + slots } /// Return all the owned parts of the array @@ -501,13 +502,7 @@ fn patches_from_slots( patches_data: Option<&PatchesData>, len: usize, ) -> Option { - PatchesData::to_optional_patches( - patches_data, - len, - slots[LP_PATCH_INDICES_SLOT].as_ref(), - slots[LP_PATCH_VALUES_SLOT].as_ref(), - slots[LP_PATCH_CHUNK_OFFSETS_SLOT].as_ref(), - ) + PatchesData::patches_from_slots(patches_data, len, slots, LP_PATCH_SLOTS) } fn validate_parts( diff --git a/encodings/fastlanes/src/bitpacking/array/mod.rs b/encodings/fastlanes/src/bitpacking/array/mod.rs index f28212842ee..e5c64252fbc 100644 --- a/encodings/fastlanes/src/bitpacking/array/mod.rs +++ b/encodings/fastlanes/src/bitpacking/array/mod.rs @@ -15,6 +15,7 @@ use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::dtype::NativePType; use vortex_array::dtype::PType; +use vortex_array::patches::PatchSlotIndices; use vortex_array::patches::Patches; use vortex_array::patches::PatchesData; use vortex_array::validity::Validity; @@ -44,6 +45,12 @@ pub struct BitPackedSlots { pub validity_child: Option, } +pub(crate) const PATCH_SLOTS: PatchSlotIndices = PatchSlotIndices { + indices: BitPackedSlots::PATCH_INDICES, + values: BitPackedSlots::PATCH_VALUES, + chunk_offsets: BitPackedSlots::PATCH_CHUNK_OFFSETS, +}; + pub struct BitPackedDataParts { pub offset: u16, pub bit_width: u8, @@ -286,12 +293,11 @@ pub trait BitPackedArrayExt: BitPackedArraySlotsExt { #[inline] fn patches(&self) -> Option { - PatchesData::to_optional_patches( + PatchesData::patches_from_slots( self.patches_data.as_ref(), self.as_ref().len(), - self.patch_indices(), - self.patch_values(), - self.patch_chunk_offsets(), + self.as_ref().slots(), + PATCH_SLOTS, ) } diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index 64489f22401..5a3bae72cb5 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -46,6 +46,7 @@ use crate::bitpack_decompress::unpack_array; use crate::bitpack_decompress::unpack_into_primitive_builder; use crate::bitpacking::array::BitPackedSlots; use crate::bitpacking::array::BitPackedSlotsView; +use crate::bitpacking::array::PATCH_SLOTS; use crate::bitpacking::vtable::kernels::PARENT_KERNELS; use crate::bitpacking::vtable::rules::RULES; mod kernels; @@ -102,16 +103,11 @@ impl VTable for BitPacked { len: usize, slots: &[Option], ) -> VortexResult<()> { - let slots = BitPackedSlotsView::from_slots(slots); + let bp_slots = BitPackedSlotsView::from_slots(slots); - let validity = child_to_validity(slots.validity_child, dtype.nullability()); - let patches = PatchesData::to_optional_patches( - data.patches_data.as_ref(), - len, - slots.patch_indices, - slots.patch_values, - slots.patch_chunk_offsets, - ); + let validity = child_to_validity(bp_slots.validity_child, dtype.nullability()); + let patches = + PatchesData::patches_from_slots(data.patches_data.as_ref(), len, slots, PATCH_SLOTS); BitPackedData::validate( &data.packed, dtype.as_ptype(), @@ -212,9 +208,10 @@ impl VTable for BitPacked { .transpose()?; let slots = { - let (pi, pv, pco) = PatchesData::make_optional_slots(patches.as_ref()); - let validity_slot = validity_to_child(&validity, len); - vec![pi, pv, pco, validity_slot] + let mut s = Vec::with_capacity(4); + PatchesData::push_slots(&mut s, patches.as_ref()); + s.push(validity_to_child(&validity, len)); + s }; let data = BitPackedData::try_new( packed, @@ -303,9 +300,10 @@ impl BitPacked { ) -> VortexResult { let dtype = DType::Primitive(ptype, validity.nullability()); let slots = { - let (pi, pv, pco) = PatchesData::make_optional_slots(patches.as_ref()); - let validity_slot = validity_to_child(&validity, len); - vec![pi, pv, pco, validity_slot] + let mut s = Vec::with_capacity(4); + PatchesData::push_slots(&mut s, patches.as_ref()); + s.push(validity_to_child(&validity, len)); + s }; let data = BitPackedData::try_new(packed, patches, bit_width, offset)?; Array::try_from_parts(ArrayParts::new(BitPacked, dtype, len, data).with_slots(slots)) diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 3903a7b3274..16c0ac217a3 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -29,6 +29,7 @@ use vortex_array::buffer::BufferHandle; use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; +use vortex_array::patches::PatchSlotIndices; use vortex_array::patches::Patches; use vortex_array::patches::PatchesData; use vortex_array::patches::PatchesMetadata; @@ -219,6 +220,11 @@ impl VTable for Sparse { pub(crate) const NUM_SLOTS: usize = 3; pub(crate) const SLOT_NAMES: [&str; NUM_SLOTS] = ["patch_indices", "patch_values", "patch_chunk_offsets"]; +const PATCH_SLOTS: PatchSlotIndices = PatchSlotIndices { + indices: 0, + values: 1, + chunk_offsets: 2, +}; #[derive(Clone, Debug)] pub struct SparseData { @@ -333,22 +339,15 @@ impl SparseData { } fn make_slots(patches: &Patches) -> Vec> { - let (indices, values, chunk_offsets) = PatchesData::make_slots(patches); - vec![Some(indices), Some(values), chunk_offsets] + let mut slots = Vec::with_capacity(NUM_SLOTS); + PatchesData::push_slots(&mut slots, Some(patches)); + slots } /// Reconstruct a [`Patches`] from the stored metadata and the array's slots. fn patches_from_slots(data: &SparseData, len: usize, slots: &[Option]) -> Patches { - data.patches_data.to_patches( - len, - slots[0] - .clone() - .vortex_expect("SparseArray patch_indices slot"), - slots[1] - .clone() - .vortex_expect("SparseArray patch_values slot"), - slots[2].clone(), - ) + PatchesData::patches_from_slots(Some(&data.patches_data), len, slots, PATCH_SLOTS) + .vortex_expect("SparseArray patch slots must be present") } /// Build a new SparseData from an existing set of patches, normalizing dtypes. diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 0f859cb44ed..0555a3e32da 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -13760,6 +13760,24 @@ pub fn vortex_array::ArrayRef::optimize_recursive(&self, &vortex_session::Vortex pub mod vortex_array::patches +pub struct vortex_array::patches::PatchSlotIndices + +pub vortex_array::patches::PatchSlotIndices::chunk_offsets: usize + +pub vortex_array::patches::PatchSlotIndices::indices: usize + +pub vortex_array::patches::PatchSlotIndices::values: usize + +impl core::clone::Clone for vortex_array::patches::PatchSlotIndices + +pub fn vortex_array::patches::PatchSlotIndices::clone(&self) -> vortex_array::patches::PatchSlotIndices + +impl core::fmt::Debug for vortex_array::patches::PatchSlotIndices + +pub fn vortex_array::patches::PatchSlotIndices::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::Copy for vortex_array::patches::PatchSlotIndices + pub struct vortex_array::patches::Patches impl vortex_array::patches::Patches @@ -13844,19 +13862,17 @@ pub struct vortex_array::patches::PatchesData impl vortex_array::patches::PatchesData -pub fn vortex_array::patches::PatchesData::from_patches(&vortex_array::patches::Patches) -> Self - -pub fn vortex_array::patches::PatchesData::make_optional_slots(core::option::Option<&vortex_array::patches::Patches>) -> (core::option::Option, core::option::Option, core::option::Option) +pub const vortex_array::patches::PatchesData::NUM_SLOTS: usize -pub fn vortex_array::patches::PatchesData::make_slots(&vortex_array::patches::Patches) -> (vortex_array::ArrayRef, vortex_array::ArrayRef, core::option::Option) +pub fn vortex_array::patches::PatchesData::from_patches(&vortex_array::patches::Patches) -> Self pub fn vortex_array::patches::PatchesData::offset(&self) -> usize pub fn vortex_array::patches::PatchesData::offset_within_chunk(&self) -> core::option::Option -pub fn vortex_array::patches::PatchesData::to_optional_patches(core::option::Option<&Self>, usize, core::option::Option<&vortex_array::ArrayRef>, core::option::Option<&vortex_array::ArrayRef>, core::option::Option<&vortex_array::ArrayRef>) -> core::option::Option +pub fn vortex_array::patches::PatchesData::patches_from_slots(core::option::Option<&Self>, usize, &[core::option::Option], vortex_array::patches::PatchSlotIndices) -> core::option::Option -pub fn vortex_array::patches::PatchesData::to_patches(&self, usize, vortex_array::ArrayRef, vortex_array::ArrayRef, core::option::Option) -> vortex_array::patches::Patches +pub fn vortex_array::patches::PatchesData::push_slots(&mut alloc::vec::Vec>, core::option::Option<&vortex_array::patches::Patches>) impl core::clone::Clone for vortex_array::patches::PatchesData diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index 7ce88aeac84..88baa48418f 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -10,6 +10,7 @@ use num_traits::NumCast; use vortex_buffer::BitBuffer; use vortex_buffer::BufferMut; use vortex_error::VortexError; +use vortex_error::VortexExpect as _; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; @@ -132,7 +133,18 @@ pub struct PatchesData { offset_within_chunk: Option, } +/// Slot indices for the three patch components within a slot array. +#[derive(Copy, Clone, Debug)] +pub struct PatchSlotIndices { + pub indices: usize, + pub values: usize, + pub chunk_offsets: usize, +} + impl PatchesData { + /// The number of slots used by patches (indices, values, chunk_offsets). + pub const NUM_SLOTS: usize = 3; + /// Extract patch metadata from an existing [`Patches`]. pub fn from_patches(patches: &Patches) -> Self { Self { @@ -141,74 +153,50 @@ impl PatchesData { } } - /// Reconstruct a [`Patches`] from this metadata and the slot arrays. + /// Reconstruct patches from the given slot positions. /// - /// # Safety - /// Caller must ensure the slot arrays satisfy the `Patches` invariants. - pub fn to_patches( - &self, + /// Returns `None` if `patches_data` is `None`. + /// Panics if `patches_data` is `Some` but the indices or values slots are missing. + pub fn patches_from_slots( + patches_data: Option<&Self>, len: usize, - indices: ArrayRef, - values: ArrayRef, - chunk_offsets: Option, - ) -> Patches { - unsafe { + slots: &[Option], + slot_idx: PatchSlotIndices, + ) -> Option { + let data = patches_data?; + let indices = slots[slot_idx.indices] + .as_ref() + .vortex_expect("patches_data is set but patch_indices slot is missing"); + let values = slots[slot_idx.values] + .as_ref() + .vortex_expect("patches_data is set but patch_values slot is missing"); + Some(unsafe { Patches::new_unchecked( len, - self.offset, - indices, - values, - chunk_offsets, - self.offset_within_chunk, + data.offset, + indices.clone(), + values.clone(), + slots[slot_idx.chunk_offsets].clone(), + data.offset_within_chunk, ) - } + }) } - /// Reconstruct an optional [`Patches`] from optional metadata and slot arrays. + /// Push 3 patch slots (indices, values, chunk_offsets) onto a slot vector. /// - /// Returns `None` if `patches_data` is `None` or if indices/values slots are missing. - pub fn to_optional_patches( - patches_data: Option<&Self>, - len: usize, - indices: Option<&ArrayRef>, - values: Option<&ArrayRef>, - chunk_offsets: Option<&ArrayRef>, - ) -> Option { - let data = patches_data?; - match (indices, values) { - (Some(indices), Some(values)) => Some(unsafe { - Patches::new_unchecked( - len, - data.offset, - indices.clone(), - values.clone(), - chunk_offsets.cloned(), - data.offset_within_chunk, - ) - }), - _ => None, - } - } - - /// Extract the slot arrays (indices, values, chunk_offsets) from a [`Patches`]. - pub fn make_slots(patches: &Patches) -> (ArrayRef, ArrayRef, Option) { - ( - patches.indices().clone(), - patches.values().clone(), - patches.chunk_offsets().clone(), - ) - } - - /// Extract optional slot arrays from an optional [`Patches`]. - pub fn make_optional_slots( - patches: Option<&Patches>, - ) -> (Option, Option, Option) { + /// If `patches` is `None`, pushes three `None` entries. + pub fn push_slots(slots: &mut Vec>, patches: Option<&Patches>) { match patches { Some(p) => { - let (indices, values, chunk_offsets) = Self::make_slots(p); - (Some(indices), Some(values), chunk_offsets) + slots.push(Some(p.indices().clone())); + slots.push(Some(p.values().clone())); + slots.push(p.chunk_offsets().clone()); + } + None => { + slots.push(None); + slots.push(None); + slots.push(None); } - None => (None, None, None), } } From 49a88d3f38cd7cdeaeb4d90b427d2376780b6cd3 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 6 May 2026 17:49:01 +0100 Subject: [PATCH 3/4] fix[patches]: ensure cached patches are updated and use `PatchesData` Signed-off-by: Joe Isaacs --- vortex-array/src/patches.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index 88baa48418f..f1fd0a32e9b 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -142,9 +142,6 @@ pub struct PatchSlotIndices { } impl PatchesData { - /// The number of slots used by patches (indices, values, chunk_offsets). - pub const NUM_SLOTS: usize = 3; - /// Extract patch metadata from an existing [`Patches`]. pub fn from_patches(patches: &Patches) -> Self { Self { From edd2c4a2e33cefed5920a7b223dee70ee90549bc Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 6 May 2026 17:53:46 +0100 Subject: [PATCH 4/4] fix[patches]: ensure cached patches are updated and use `PatchesData` Signed-off-by: Joe Isaacs --- vortex-array/public-api.lock | 2 -- 1 file changed, 2 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 0555a3e32da..ee99b6440ab 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -13862,8 +13862,6 @@ pub struct vortex_array::patches::PatchesData impl vortex_array::patches::PatchesData -pub const vortex_array::patches::PatchesData::NUM_SLOTS: usize - pub fn vortex_array::patches::PatchesData::from_patches(&vortex_array::patches::Patches) -> Self pub fn vortex_array::patches::PatchesData::offset(&self) -> usize