From e00f026c8533e51d426210589dcf6f295becd6bc Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Tue, 5 May 2026 14:09:25 +0100 Subject: [PATCH 1/4] add bench Signed-off-by: Baris Palaska --- vortex-array/Cargo.toml | 4 + vortex-array/benches/scalar_at_patches.rs | 122 ++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 vortex-array/benches/scalar_at_patches.rs diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index f8676d76ef0..5489d22cfb9 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -124,6 +124,10 @@ harness = false name = "scalar_at_struct" harness = false +[[bench]] +name = "scalar_at_patches" +harness = false + [[bench]] name = "varbinview_compact" harness = false diff --git a/vortex-array/benches/scalar_at_patches.rs b/vortex-array/benches/scalar_at_patches.rs new file mode 100644 index 00000000000..16b88448804 --- /dev/null +++ b/vortex-array/benches/scalar_at_patches.rs @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![expect(clippy::unwrap_used)] +#![expect(clippy::cast_possible_truncation)] + +use divan::Bencher; +use rand::RngExt; +use rand::SeedableRng; +use rand::rngs::StdRng; +use vortex_array::IntoArray; +use vortex_array::patches::Patches; +use vortex_buffer::Buffer; + +fn main() { + divan::main(); +} + +const ARRAY_LEN: usize = 1_000_000; +const NUM_PATCHES: usize = 100; +const NUM_QUERIES: usize = 1_000; + +// Patch indices for `narrow_band_patches` are sampled from this window. +const PATCH_LOW: usize = 100_000; +const PATCH_HIGH: usize = 110_000; + +/// Build a `Patches` whose indices are sampled from `index_iter`. +/// +/// Indices are sorted and deduplicated; the values column is a dense +/// `i32` sequence and is incidental to the benchmarks (which target +/// index lookup, not value materialization). +fn patches_from_indices(index_iter: impl Iterator) -> Patches { + let mut indices: Vec = index_iter.collect(); + indices.sort(); + indices.dedup(); + let values: Buffer = (0..indices.len() as i32).collect(); + Patches::new( + ARRAY_LEN, + 0, + Buffer::from(indices).into_array(), + values.into_array(), + None, + ) + .unwrap() +} + +/// All patches clustered in `PATCH_LOW..PATCH_HIGH` — models a localized burst. +fn narrow_band_patches() -> Patches { + let mut rng = StdRng::seed_from_u64(42); + patches_from_indices( + (0..NUM_PATCHES).map(|_| rng.random_range((PATCH_LOW as u64)..(PATCH_HIGH as u64))), + ) +} + +/// Patches spread uniformly across the full array. +fn full_range_patches() -> Patches { + let mut rng = StdRng::seed_from_u64(43); + patches_from_indices((0..NUM_PATCHES).map(|_| rng.random_range(0..(ARRAY_LEN as u64)))) +} + +fn bench_search_index(bencher: Bencher, patches: Patches, queries: Vec) { + bencher + .with_inputs(|| (&patches, &queries)) + .bench_refs(|(patches, queries)| { + for &q in queries.iter() { + divan::black_box(patches.search_index(q).unwrap()); + } + }); +} + +#[divan::bench] +fn search_index_below_min(bencher: Bencher) { + let queries = (0..NUM_QUERIES).collect(); + bench_search_index(bencher, narrow_band_patches(), queries); +} + +#[divan::bench] +fn search_index_above_max(bencher: Bencher) { + let queries = (PATCH_HIGH..(PATCH_HIGH + NUM_QUERIES)).collect(); + bench_search_index(bencher, narrow_band_patches(), queries); +} + +#[divan::bench] +fn search_index_mixed_out_of_range(bencher: Bencher) { + let queries: Vec = (0..NUM_QUERIES / 2) + .map(|i| i * 100) + .chain((0..NUM_QUERIES / 2).map(|i| PATCH_HIGH + i * 50)) + .collect(); + bench_search_index(bencher, narrow_band_patches(), queries); +} + +#[divan::bench] +fn search_index_in_range(bencher: Bencher) { + let mut rng = StdRng::seed_from_u64(7); + let queries: Vec = (0..NUM_QUERIES) + .map(|_| rng.random_range(PATCH_LOW..PATCH_HIGH)) + .collect(); + bench_search_index(bencher, narrow_band_patches(), queries); +} + +#[divan::bench] +fn search_index_full_range_random(bencher: Bencher) { + let mut rng = StdRng::seed_from_u64(11); + let queries: Vec = (0..NUM_QUERIES) + .map(|_| rng.random_range(0..ARRAY_LEN)) + .collect(); + bench_search_index(bencher, full_range_patches(), queries); +} + +#[divan::bench] +fn get_patched_above_max(bencher: Bencher) { + let patches = narrow_band_patches(); + let queries: Vec = (PATCH_HIGH..(PATCH_HIGH + NUM_QUERIES)).collect(); + + bencher + .with_inputs(|| (&patches, &queries)) + .bench_refs(|(patches, queries)| { + for &q in queries.iter() { + divan::black_box(patches.get_patched(q).unwrap()); + } + }); +} From 24b61a47845e3409e0d3559579164daed7fe9682 Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Tue, 5 May 2026 14:12:54 +0100 Subject: [PATCH 2/4] rename Signed-off-by: Baris Palaska --- vortex-array/Cargo.toml | 2 +- .../benches/{scalar_at_patches.rs => patches_lookup.rs} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename vortex-array/benches/{scalar_at_patches.rs => patches_lookup.rs} (100%) diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 5489d22cfb9..9701759a787 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -125,7 +125,7 @@ name = "scalar_at_struct" harness = false [[bench]] -name = "scalar_at_patches" +name = "patches_lookup" harness = false [[bench]] diff --git a/vortex-array/benches/scalar_at_patches.rs b/vortex-array/benches/patches_lookup.rs similarity index 100% rename from vortex-array/benches/scalar_at_patches.rs rename to vortex-array/benches/patches_lookup.rs From 3b076c45f536e8e5447f8957a6a17b7f0ba14242 Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Tue, 5 May 2026 14:35:03 +0100 Subject: [PATCH 3/4] rm get_patched bench Signed-off-by: Baris Palaska --- vortex-array/benches/patches_lookup.rs | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/vortex-array/benches/patches_lookup.rs b/vortex-array/benches/patches_lookup.rs index 16b88448804..e1bd5672596 100644 --- a/vortex-array/benches/patches_lookup.rs +++ b/vortex-array/benches/patches_lookup.rs @@ -20,15 +20,9 @@ const ARRAY_LEN: usize = 1_000_000; const NUM_PATCHES: usize = 100; const NUM_QUERIES: usize = 1_000; -// Patch indices for `narrow_band_patches` are sampled from this window. const PATCH_LOW: usize = 100_000; const PATCH_HIGH: usize = 110_000; -/// Build a `Patches` whose indices are sampled from `index_iter`. -/// -/// Indices are sorted and deduplicated; the values column is a dense -/// `i32` sequence and is incidental to the benchmarks (which target -/// index lookup, not value materialization). fn patches_from_indices(index_iter: impl Iterator) -> Patches { let mut indices: Vec = index_iter.collect(); indices.sort(); @@ -44,7 +38,6 @@ fn patches_from_indices(index_iter: impl Iterator) -> Patches { .unwrap() } -/// All patches clustered in `PATCH_LOW..PATCH_HIGH` — models a localized burst. fn narrow_band_patches() -> Patches { let mut rng = StdRng::seed_from_u64(42); patches_from_indices( @@ -52,7 +45,6 @@ fn narrow_band_patches() -> Patches { ) } -/// Patches spread uniformly across the full array. fn full_range_patches() -> Patches { let mut rng = StdRng::seed_from_u64(43); patches_from_indices((0..NUM_PATCHES).map(|_| rng.random_range(0..(ARRAY_LEN as u64)))) @@ -106,17 +98,3 @@ fn search_index_full_range_random(bencher: Bencher) { .collect(); bench_search_index(bencher, full_range_patches(), queries); } - -#[divan::bench] -fn get_patched_above_max(bencher: Bencher) { - let patches = narrow_band_patches(); - let queries: Vec = (PATCH_HIGH..(PATCH_HIGH + NUM_QUERIES)).collect(); - - bencher - .with_inputs(|| (&patches, &queries)) - .bench_refs(|(patches, queries)| { - for &q in queries.iter() { - divan::black_box(patches.get_patched(q).unwrap()); - } - }); -} From 0f8ff5510aaea99cc95fd34f732b612e80f0e4bd Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Tue, 5 May 2026 16:07:02 +0100 Subject: [PATCH 4/4] also bench with chunk offsets Signed-off-by: Baris Palaska --- vortex-array/benches/patches_lookup.rs | 118 +++++++++++++++++++------ 1 file changed, 92 insertions(+), 26 deletions(-) diff --git a/vortex-array/benches/patches_lookup.rs b/vortex-array/benches/patches_lookup.rs index e1bd5672596..cbf753e668f 100644 --- a/vortex-array/benches/patches_lookup.rs +++ b/vortex-array/benches/patches_lookup.rs @@ -9,6 +9,7 @@ use rand::RngExt; use rand::SeedableRng; use rand::rngs::StdRng; use vortex_array::IntoArray; +use vortex_array::patches::PATCH_CHUNK_SIZE; use vortex_array::patches::Patches; use vortex_buffer::Buffer; @@ -23,31 +24,77 @@ const NUM_QUERIES: usize = 1_000; const PATCH_LOW: usize = 100_000; const PATCH_HIGH: usize = 110_000; -fn patches_from_indices(index_iter: impl Iterator) -> Patches { +fn patches_from_indices(index_iter: impl Iterator, chunked: bool) -> Patches { let mut indices: Vec = index_iter.collect(); indices.sort(); indices.dedup(); let values: Buffer = (0..indices.len() as i32).collect(); - Patches::new( + + let chunk_offsets = chunked.then(|| { + let offsets: Vec = (0..ARRAY_LEN) + .step_by(PATCH_CHUNK_SIZE) + .map(|chunk_start| indices.partition_point(|&idx| (idx as usize) < chunk_start) as u64) + .collect(); + Buffer::from(offsets).into_array() + }); + + let patches = Patches::new( ARRAY_LEN, 0, Buffer::from(indices).into_array(), values.into_array(), - None, + chunk_offsets, ) - .unwrap() + .unwrap(); + if chunked { + assert!(patches.chunk_offsets().is_some()); + } + patches } -fn narrow_band_patches() -> Patches { +fn narrow_band_patches(chunked: bool) -> Patches { let mut rng = StdRng::seed_from_u64(42); patches_from_indices( (0..NUM_PATCHES).map(|_| rng.random_range((PATCH_LOW as u64)..(PATCH_HIGH as u64))), + chunked, ) } -fn full_range_patches() -> Patches { +fn full_range_patches(chunked: bool) -> Patches { let mut rng = StdRng::seed_from_u64(43); - patches_from_indices((0..NUM_PATCHES).map(|_| rng.random_range(0..(ARRAY_LEN as u64)))) + patches_from_indices( + (0..NUM_PATCHES).map(|_| rng.random_range(0..(ARRAY_LEN as u64))), + chunked, + ) +} + +fn queries_below_min() -> Vec { + (0..NUM_QUERIES).collect() +} + +fn queries_above_max() -> Vec { + (PATCH_HIGH..(PATCH_HIGH + NUM_QUERIES)).collect() +} + +fn queries_mixed_out_of_range() -> Vec { + (0..NUM_QUERIES / 2) + .map(|i| i * 100) + .chain((0..NUM_QUERIES / 2).map(|i| PATCH_HIGH + i * 50)) + .collect() +} + +fn queries_in_range() -> Vec { + let mut rng = StdRng::seed_from_u64(7); + (0..NUM_QUERIES) + .map(|_| rng.random_range(PATCH_LOW..PATCH_HIGH)) + .collect() +} + +fn queries_full_range() -> Vec { + let mut rng = StdRng::seed_from_u64(11); + (0..NUM_QUERIES) + .map(|_| rng.random_range(0..ARRAY_LEN)) + .collect() } fn bench_search_index(bencher: Bencher, patches: Patches, queries: Vec) { @@ -62,39 +109,58 @@ fn bench_search_index(bencher: Bencher, patches: Patches, queries: Vec) { #[divan::bench] fn search_index_below_min(bencher: Bencher) { - let queries = (0..NUM_QUERIES).collect(); - bench_search_index(bencher, narrow_band_patches(), queries); + bench_search_index(bencher, narrow_band_patches(false), queries_below_min()); +} + +#[divan::bench] +fn search_index_below_min_chunked(bencher: Bencher) { + bench_search_index(bencher, narrow_band_patches(true), queries_below_min()); } #[divan::bench] fn search_index_above_max(bencher: Bencher) { - let queries = (PATCH_HIGH..(PATCH_HIGH + NUM_QUERIES)).collect(); - bench_search_index(bencher, narrow_band_patches(), queries); + bench_search_index(bencher, narrow_band_patches(false), queries_above_max()); +} + +#[divan::bench] +fn search_index_above_max_chunked(bencher: Bencher) { + bench_search_index(bencher, narrow_band_patches(true), queries_above_max()); } #[divan::bench] fn search_index_mixed_out_of_range(bencher: Bencher) { - let queries: Vec = (0..NUM_QUERIES / 2) - .map(|i| i * 100) - .chain((0..NUM_QUERIES / 2).map(|i| PATCH_HIGH + i * 50)) - .collect(); - bench_search_index(bencher, narrow_band_patches(), queries); + bench_search_index( + bencher, + narrow_band_patches(false), + queries_mixed_out_of_range(), + ); +} + +#[divan::bench] +fn search_index_mixed_out_of_range_chunked(bencher: Bencher) { + bench_search_index( + bencher, + narrow_band_patches(true), + queries_mixed_out_of_range(), + ); } #[divan::bench] fn search_index_in_range(bencher: Bencher) { - let mut rng = StdRng::seed_from_u64(7); - let queries: Vec = (0..NUM_QUERIES) - .map(|_| rng.random_range(PATCH_LOW..PATCH_HIGH)) - .collect(); - bench_search_index(bencher, narrow_band_patches(), queries); + bench_search_index(bencher, narrow_band_patches(false), queries_in_range()); +} + +#[divan::bench] +fn search_index_in_range_chunked(bencher: Bencher) { + bench_search_index(bencher, narrow_band_patches(true), queries_in_range()); } #[divan::bench] fn search_index_full_range_random(bencher: Bencher) { - let mut rng = StdRng::seed_from_u64(11); - let queries: Vec = (0..NUM_QUERIES) - .map(|_| rng.random_range(0..ARRAY_LEN)) - .collect(); - bench_search_index(bencher, full_range_patches(), queries); + bench_search_index(bencher, full_range_patches(false), queries_full_range()); +} + +#[divan::bench] +fn search_index_full_range_random_chunked(bencher: Bencher) { + bench_search_index(bencher, full_range_patches(true), queries_full_range()); }