Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ cargo-fuzz = true
default = ["native"]
native = ["libfuzzer-sys", "zstd", "vortex-file", "vortex/files"]
wasmfuzz = []
zstd = ["vortex/zstd"]
zstd = ["vortex/zstd", "vortex-btrblocks/zstd", "vortex-btrblocks/pco"]
cuda = ["vortex-cuda", "vortex/cuda", "tokio"]

[dependencies]
Expand Down
3 changes: 1 addition & 2 deletions fuzz/fuzz_targets/file_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ use vortex_fuzz::CompressorStrategy;
use vortex_fuzz::FuzzFileAction;
use vortex_fuzz::RUNTIME;
use vortex_fuzz::SESSION;
use vortex_layout::layouts::compact::CompactCompressor;
use vortex_utils::aliases::DefaultHashBuilder;
use vortex_utils::aliases::hash_set::HashSet;

Expand Down Expand Up @@ -62,7 +61,7 @@ fuzz_target!(|fuzz: FuzzFileAction| -> Corpus {
CompressorStrategy::Default => SESSION.write_options(),
CompressorStrategy::Compact => {
let strategy = WriteStrategyBuilder::default()
.with_compressor(CompactCompressor::default())
.with_compact_encodings()
.build();
SESSION.write_options().with_strategy(strategy)
}
Expand Down
13 changes: 10 additions & 3 deletions fuzz/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,15 +517,22 @@ fn random_action_from_list(
/// Compress an array using the given strategy.
#[cfg(feature = "zstd")]
pub fn compress_array(array: &dyn Array, strategy: CompressorStrategy) -> ArrayRef {
use vortex_layout::layouts::compact::CompactCompressor;
use vortex_btrblocks::BtrBlocksCompressorBuilder;
use vortex_btrblocks::FloatCode;
use vortex_btrblocks::IntCode;
use vortex_btrblocks::StringCode;

match strategy {
CompressorStrategy::Default => BtrBlocksCompressor::default()
.compress(array)
.vortex_expect("BtrBlocksCompressor compress should succeed in fuzz test"),
CompressorStrategy::Compact => CompactCompressor::default()
CompressorStrategy::Compact => BtrBlocksCompressorBuilder::default()
.include_string([StringCode::Zstd])
.include_int([IntCode::Pco])
.include_float([FloatCode::Pco])
.build()
.compress(array)
.vortex_expect("CompactCompressor compress should succeed in fuzz test"),
.vortex_expect("Compact compress should succeed in fuzz test"),
}
}

Expand Down
3 changes: 1 addition & 2 deletions vortex-bench/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ use vortex::error::VortexExpect;
use vortex::error::vortex_err;
use vortex::file::VortexWriteOptions;
use vortex::file::WriteStrategyBuilder;
use vortex::layout::layouts::compact::CompactCompressor;
use vortex::utils::aliases::hash_map::HashMap;

pub mod benchmark;
Expand Down Expand Up @@ -215,7 +214,7 @@ impl CompactionStrategy {
match self {
CompactionStrategy::Compact => options.with_strategy(
WriteStrategyBuilder::default()
.with_compressor(CompactCompressor::default())
.with_compact_encodings()
.build(),
),
CompactionStrategy::Default => options,
Expand Down
5 changes: 5 additions & 0 deletions vortex-btrblocks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ enum-iterator = { workspace = true }
getrandom_v03 = { workspace = true }
itertools = { workspace = true }
num-traits = { workspace = true }
pco = { workspace = true, optional = true }
rand = { workspace = true }
rustc-hash = { workspace = true }
tracing = { workspace = true }
Expand All @@ -31,12 +32,14 @@ vortex-error = { workspace = true }
vortex-fastlanes = { workspace = true }
vortex-fsst = { workspace = true }
vortex-mask = { workspace = true }
vortex-pco = { workspace = true, optional = true }
vortex-runend = { workspace = true }
vortex-scalar = { workspace = true }
vortex-sequence = { workspace = true }
vortex-sparse = { workspace = true }
vortex-utils = { workspace = true }
vortex-zigzag = { workspace = true }
vortex-zstd = { workspace = true, optional = true }

[dev-dependencies]
divan = { workspace = true }
Expand All @@ -47,6 +50,8 @@ vortex-array = { workspace = true, features = ["_test-harness"] }
[features]
# This feature enabled unstable encodings for which we don't guarantee stability.
unstable_encodings = []
pco = ["dep:pco", "dep:vortex-pco"]
zstd = ["dep:vortex-zstd"]

[lints]
workspace = true
Expand Down
18 changes: 15 additions & 3 deletions vortex-btrblocks/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,21 @@ pub struct BtrBlocksCompressorBuilder {
impl Default for BtrBlocksCompressorBuilder {
fn default() -> Self {
Self {
int_schemes: ALL_INT_SCHEMES.iter().copied().collect(),
float_schemes: ALL_FLOAT_SCHEMES.iter().copied().collect(),
string_schemes: ALL_STRING_SCHEMES.iter().copied().collect(),
int_schemes: ALL_INT_SCHEMES
.iter()
.copied()
.filter(|s| s.code() != IntCode::Pco)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here is currently where we exclude pco and zstd from the default

Comment on lines +54 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we have ALL_DEFAULT_SCEHEMS? next to all schemes?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its just people will forget to omit other schemes

.collect(),
float_schemes: ALL_FLOAT_SCHEMES
.iter()
.copied()
.filter(|s| s.code() != FloatCode::Pco)
.collect(),
string_schemes: ALL_STRING_SCHEMES
.iter()
.copied()
.filter(|s| s.code() != StringCode::Zstd)
.collect(),
}
}
}
Expand Down
34 changes: 34 additions & 0 deletions vortex-btrblocks/src/compressor/float/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ pub const ALL_FLOAT_SCHEMES: &[&dyn FloatScheme] = &[
&DictScheme,
&NullDominated,
&RLE_FLOAT_SCHEME,
#[cfg(feature = "pco")]
&PcoScheme,
];

/// [`Compressor`] for floating-point numbers.
Expand Down Expand Up @@ -142,6 +144,8 @@ pub enum FloatCode {
Rle,
/// Sparse encoding for null-dominated arrays.
Sparse,
/// Pco (pcodec) compression for floats.
Pco,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
Expand All @@ -162,6 +166,11 @@ struct DictScheme;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct NullDominated;

/// Pco (pcodec) compression for floats.
#[cfg(feature = "pco")]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct PcoScheme;

/// Configuration for float RLE compression.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct FloatRLEConfig;
Expand Down Expand Up @@ -520,6 +529,31 @@ impl Scheme for NullDominated {
}
}

#[cfg(feature = "pco")]
impl Scheme for PcoScheme {
type StatsType = FloatStats;
type CodeType = FloatCode;

fn code(&self) -> FloatCode {
FloatCode::Pco
}

fn compress(
&self,
_compressor: &BtrBlocksCompressor,
stats: &Self::StatsType,
_ctx: CompressorContext,
_excludes: &[FloatCode],
) -> VortexResult<ArrayRef> {
Ok(vortex_pco::PcoArray::from_primitive(
stats.source(),
pco::DEFAULT_COMPRESSION_LEVEL,
8192,
)?
.into_array())
}
}

#[cfg(test)]
mod tests {

Expand Down
52 changes: 52 additions & 0 deletions vortex-btrblocks/src/compressor/integer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ pub const ALL_INT_SCHEMES: &[&dyn IntegerScheme] = &[
&RunEndScheme,
&SequenceScheme,
&RLE_INTEGER_SCHEME,
#[cfg(feature = "pco")]
&PcoScheme,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not must not be in the default

];

/// [`Compressor`] for signed and unsigned integers.
Expand Down Expand Up @@ -156,6 +158,8 @@ pub enum IntCode {
Sequence,
/// RLE encoding - generic run-length encoding.
Rle,
/// Pco (pcodec) compression for integers.
Pco,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -188,6 +192,11 @@ pub struct RunEndScheme;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct SequenceScheme;

/// Pco (pcodec) compression for integers.
#[cfg(feature = "pco")]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct PcoScheme;

/// Threshold for the average run length in an array before we consider run-end encoding.
const RUN_END_THRESHOLD: u32 = 4;

Expand Down Expand Up @@ -818,6 +827,49 @@ impl Scheme for SequenceScheme {
}
}

#[cfg(feature = "pco")]
impl Scheme for PcoScheme {
type StatsType = IntegerStats;
type CodeType = IntCode;

fn code(&self) -> IntCode {
IntCode::Pco
}

fn expected_compression_ratio(
&self,
compressor: &BtrBlocksCompressor,
stats: &Self::StatsType,
ctx: CompressorContext,
excludes: &[IntCode],
) -> VortexResult<f64> {
// Pco does not support I8 or U8.
if matches!(
stats.src.ptype(),
vortex_dtype::PType::I8 | vortex_dtype::PType::U8
) {
return Ok(0.0);
}

self.estimate_compression_ratio_with_sampling(compressor, stats, ctx, excludes)
}

fn compress(
&self,
_compressor: &BtrBlocksCompressor,
stats: &Self::StatsType,
_ctx: CompressorContext,
_excludes: &[IntCode],
) -> VortexResult<ArrayRef> {
Ok(vortex_pco::PcoArray::from_primitive(
stats.source(),
pco::DEFAULT_COMPRESSION_LEVEL,
8192,
)?
.into_array())
}
}

#[cfg(test)]
mod tests {
use std::iter;
Expand Down
32 changes: 32 additions & 0 deletions vortex-btrblocks/src/compressor/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ pub const ALL_STRING_SCHEMES: &[&dyn StringScheme] = &[
&FSSTScheme,
&ConstantScheme,
&NullDominated,
#[cfg(feature = "zstd")]
&ZstdScheme,
];

/// [`Compressor`] for strings.
Expand Down Expand Up @@ -209,6 +211,11 @@ pub struct ConstantScheme;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct NullDominated;

/// Zstd compression without dictionaries (nvCOMP compatible).
#[cfg(feature = "zstd")]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct ZstdScheme;

/// Unique identifier for string compression schemes.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Sequence, Ord, PartialOrd)]
pub enum StringCode {
Expand All @@ -222,6 +229,8 @@ pub enum StringCode {
Constant,
/// Sparse encoding for null-dominated arrays.
Sparse,
/// Zstd compression without dictionaries.
Zstd,
}

impl Scheme for UncompressedScheme {
Expand Down Expand Up @@ -502,6 +511,29 @@ impl Scheme for NullDominated {
}
}

#[cfg(feature = "zstd")]
impl Scheme for ZstdScheme {
type StatsType = StringStats;
type CodeType = StringCode;

fn code(&self) -> StringCode {
StringCode::Zstd
}

fn compress(
&self,
_compressor: &BtrBlocksCompressor,
stats: &Self::StatsType,
_ctx: CompressorContext,
_excludes: &[StringCode],
) -> VortexResult<ArrayRef> {
Ok(
vortex_zstd::ZstdArray::from_var_bin_view_without_dict(stats.source(), 3, 8192)?
.into_array(),
)
}
}

#[cfg(test)]
mod tests {
use vortex_array::arrays::VarBinViewArray;
Expand Down
3 changes: 2 additions & 1 deletion vortex-file/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ tracing = { workspace = true }
uuid = { workspace = true } # Needed to pickup the "js" feature for wasm targets from the workspace configuration
vortex-alp = { workspace = true }
vortex-array = { workspace = true }
vortex-btrblocks = { workspace = true }
vortex-buffer = { workspace = true }
vortex-bytebool = { workspace = true }

Expand Down Expand Up @@ -78,7 +79,7 @@ tokio = [
"vortex-io/tokio",
"vortex-layout/tokio",
]
zstd = ["dep:vortex-zstd", "vortex-layout/zstd"]
zstd = ["dep:vortex-zstd", "vortex-btrblocks/zstd", "vortex-btrblocks/pco"]

[package.metadata.cargo-machete]
ignored = ["getrandom_v03", "uuid"]
Loading
Loading