Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ harness = false
name = "varbinview_zip"
harness = false

[[bench]]
name = "list_compute"
harness = false

[[bench]]
name = "take_primitive"
harness = false
Expand Down
115 changes: 115 additions & 0 deletions vortex-array/benches/list_compute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![allow(clippy::unwrap_used)]

use std::sync::Arc;

use divan::Bencher;
use vortex_array::ArrayRef;
use vortex_array::builders::{ArrayBuilder, ListBuilder};
use vortex_array::compute::is_sorted;
use vortex_array::compute::min_max;
use vortex_dtype::{DType, Nullability, PType};

fn main() {
divan::main();
}

const ARRAY_SIZE: usize = 1_000;
const LIST_SIZE: usize = 10;

fn create_sorted_list_array() -> ArrayRef {
let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable));
let nullability = Nullability::NonNullable;
let mut builder = ListBuilder::<u32>::new(element_dtype.clone(), nullability);

for i in 0..ARRAY_SIZE {
let list_elements: Vec<i32> = (0..LIST_SIZE).map(|j| (i * LIST_SIZE + j) as i32).collect();
let list_scalar = list_elements
.into_iter()
.map(|x| vortex_scalar::Scalar::primitive(x, Nullability::NonNullable))
.collect::<Vec<_>>();
let list = vortex_scalar::Scalar::list(
element_dtype.clone(),
list_scalar,
Nullability::NonNullable,
);
builder.append_value(list.as_list()).unwrap();
}

builder.finish()
}

fn create_almost_sorted_list_array() -> ArrayRef {
// Create an array where the last two elements are swapped
// For simplicity, we'll create a new array with the swap
let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable));
let nullability = Nullability::NonNullable;
let mut builder = ListBuilder::<u32>::new(element_dtype.clone(), nullability);

for i in 0..ARRAY_SIZE {
let list_elements: Vec<i32> = if i == ARRAY_SIZE - 2 {
// Second to last: use last elements
((ARRAY_SIZE - 1) * LIST_SIZE..ARRAY_SIZE * LIST_SIZE)
.map(|x| x as i32)
.collect()
} else if i == ARRAY_SIZE - 1 {
// Last: use second to last elements
((ARRAY_SIZE - 2) * LIST_SIZE..(ARRAY_SIZE - 1) * LIST_SIZE)
.map(|x| x as i32)
.collect()
} else {
(i * LIST_SIZE..(i + 1) * LIST_SIZE).map(|x| x as i32).collect()
};

let list_scalar = list_elements
.into_iter()
.map(|x| vortex_scalar::Scalar::primitive(x, Nullability::NonNullable))
.collect::<Vec<_>>();
let list = vortex_scalar::Scalar::list(
element_dtype.clone(),
list_scalar,
Nullability::NonNullable,
);
builder.append_value(list.as_list()).unwrap();
}

builder.finish()
}

#[divan::bench]
fn is_sorted_list_sorted(bencher: Bencher) {
let arr = create_sorted_list_array();

bencher
.with_inputs(|| &arr)
.bench_refs(|arr| is_sorted(*arr).unwrap());
}

#[divan::bench]
fn is_sorted_list_almost_sorted(bencher: Bencher) {
let arr = create_almost_sorted_list_array();

bencher
.with_inputs(|| &arr)
.bench_refs(|arr| is_sorted(*arr).unwrap());
}

#[divan::bench]
fn min_max_list_sorted(bencher: Bencher) {
let arr = create_sorted_list_array();

bencher
.with_inputs(|| &arr)
.bench_refs(|arr| min_max(*arr).unwrap());
}

#[divan::bench]
fn min_max_list_almost_sorted(bencher: Bencher) {
let arr = create_almost_sorted_list_array();

bencher
.with_inputs(|| &arr)
.bench_refs(|arr| min_max(*arr).unwrap());
}
49 changes: 42 additions & 7 deletions vortex-array/src/arrays/fixed_size_list/compute/is_sorted.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,59 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::cmp::Ordering;

use vortex_error::VortexResult;
use vortex_scalar::ListScalar;

use crate::arrays::FixedSizeListArray;
use crate::arrays::FixedSizeListVTable;
use crate::compute::IsSortedKernel;
use crate::compute::IsSortedKernelAdapter;
use crate::register_kernel;

/// IsSorted implementation for [`FixedSizeListArray`].
/// Implementation of IsSortedKernel for FixedSizeListArray.
///
/// This implementation uses lexicographic comparison of list elements.
/// Since all lists have the same fixed size, comparison is straightforward element-wise.
/// Null lists are considered the smallest values.
/// Non-comparable lists (which shouldn't occur for lists with the same element type)
/// are treated as making the array not sorted.
impl IsSortedKernel for FixedSizeListVTable {
fn is_sorted(&self, _array: &FixedSizeListArray) -> VortexResult<Option<bool>> {
// This would require comparing lists lexicographically.
Ok(None)
fn is_sorted(&self, array: &FixedSizeListArray) -> VortexResult<Option<bool>> {
if array.len() <= 1 {
return Ok(Some(true));
}
for i in 0..array.len() - 1 {
let scalar_a = array.scalar_at(i)?;
let scalar_b = array.scalar_at(i + 1)?;
let a = ListScalar::try_from(&scalar_a)?;
let b = ListScalar::try_from(&scalar_b)?;
// For is_sorted, we allow Less and Equal, but not Greater or incomparable (None)
match a.partial_cmp(&b) {
Some(Ordering::Greater) | None => return Ok(Some(false)),
_ => {}
}
}
Ok(Some(true))
}

fn is_strict_sorted(&self, _array: &FixedSizeListArray) -> VortexResult<Option<bool>> {
// This would require comparing lists lexicographically without duplicates.
Ok(None)
fn is_strict_sorted(&self, array: &FixedSizeListArray) -> VortexResult<Option<bool>> {
if array.len() <= 1 {
return Ok(Some(true));
}
for i in 0..array.len() - 1 {
let scalar_a = array.scalar_at(i)?;
let scalar_b = array.scalar_at(i + 1)?;
let a = ListScalar::try_from(&scalar_a)?;
let b = ListScalar::try_from(&scalar_b)?;
// For is_strict_sorted, we only allow Less, not Equal, Greater, or incomparable (None)
match a.partial_cmp(&b) {
Some(Ordering::Greater | Ordering::Equal) | None => return Ok(Some(false)),
_ => {}
}
}
Ok(Some(true))
}
}

Expand Down
36 changes: 33 additions & 3 deletions vortex-array/src/arrays/fixed_size_list/compute/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexResult;
use vortex_scalar::ListScalar;
use vortex_scalar::Scalar;

use crate::arrays::FixedSizeListArray;
use crate::arrays::FixedSizeListVTable;
Expand All @@ -12,9 +14,37 @@ use crate::register_kernel;

/// MinMax implementation for [`FixedSizeListArray`].
impl MinMaxKernel for FixedSizeListVTable {
fn min_max(&self, _array: &FixedSizeListArray) -> VortexResult<Option<MinMaxResult>> {
// This would require finding the lexicographically minimum and maximum lists.
Ok(None)
fn min_max(&self, array: &FixedSizeListArray) -> VortexResult<Option<MinMaxResult>> {
let mut min: Option<Scalar> = None;
let mut max: Option<Scalar> = None;
for i in 0..array.len() {
let scalar = array.scalar_at(i)?;
if scalar.is_null() {
continue;
}
let list_scalar = ListScalar::try_from(&scalar)?;
if let Some(current_min) = &min {
let current_min_list = ListScalar::try_from(current_min)?;
if list_scalar < current_min_list {
min = Some(scalar.clone());
}
} else {
min = Some(scalar.clone());
}
if let Some(current_max) = &max {
let current_max_list = ListScalar::try_from(current_max)?;
if list_scalar > current_max_list {
max = Some(scalar.clone());
}
} else {
max = Some(scalar.clone());
}
}
match (min, max) {
(Some(min), Some(max)) => Ok(Some(MinMaxResult { min, max })),
(None, None) => Ok(None),
_ => unreachable!("min and max should be set together or both remain None"),
}
}
}

Expand Down
Loading