Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 116 additions & 24 deletions vortex-duckdb/src/datasource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -594,8 +594,6 @@ struct ProjectionWithVirtualColumns {
file_row_number_column_pos: Option<usize>,
}

/// Creates a projection expression from raw projection/column ID slices and
/// column names.
fn extract_projection_expr(
projection_ids: Option<&[u64]>,
column_ids: &[u64],
Expand All @@ -610,33 +608,53 @@ fn extract_projection_expr(

let mut file_index_column_pos = None;
let mut file_row_number_column_pos = None;
let mut is_star = true;
let mut real_column_count = 0;

for (column_pos, &column_id) in ids.iter().enumerate() {
#[expect(clippy::cast_possible_truncation)]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not this please

let column_id = if has_projection_ids {
column_ids[column_id as usize]
} else {
column_id
};

#[expect(clippy::cast_possible_truncation)]
let names = ids
.iter()
.enumerate()
.map(|(column_pos, &column_id)| {
let column_id = if has_projection_ids {
column_ids[column_id as usize]
} else {
column_id
};
if column_id == FILE_INDEX_COLUMN_IDX {
file_index_column_pos = Some(column_pos);
continue;
}
if column_id == FILE_ROW_NUMBER_COLUMN_IDX {
file_row_number_column_pos = Some(column_pos);
continue;
}

if column_id == FILE_INDEX_COLUMN_IDX {
file_index_column_pos = Some(column_pos);
}
if column_id == FILE_ROW_NUMBER_COLUMN_IDX {
file_row_number_column_pos = Some(column_pos);
}
is_star &= column_id == real_column_count;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

explain this.

real_column_count += 1;
}
is_star &= real_column_count == column_fields.len() as u64;

column_id
})
.filter(|&col_id| !is_virtual_column(col_id))
.map(|col_id| Arc::from(column_fields[col_id as usize].name.as_str()))
.collect::<FieldNames>();
let select = if is_star {
root()
} else {
#[expect(clippy::cast_possible_truncation)]
let names = ids
.iter()
.map(|&column_id| {
if has_projection_ids {
column_ids[column_id as usize]
} else {
column_id
}
})
.filter(|&col_id| !is_virtual_column(col_id))
.map(|col_id| Arc::from(column_fields[col_id as usize].name.as_str()))
.collect::<FieldNames>();

select(names, root())
};

// file_index column will be filled later when exporting the chunk.
let select = select(names, root());

let projection = if file_row_number_column_pos.is_some() {
// row_idx will be rearranged to correct position in scan(), prepend
// here
Expand Down Expand Up @@ -723,7 +741,20 @@ mod tests {
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering::Relaxed;

use vortex::dtype::DType;
use vortex::dtype::PType;
use vortex::expr::cast;
use vortex::expr::merge;
use vortex::expr::pack;
use vortex::expr::root;
use vortex::layout::layouts::row_idx::row_idx;

use super::progress;
use crate::datasource::DuckdbField;
use crate::datasource::FILE_INDEX_COLUMN_IDX;
use crate::datasource::FILE_ROW_NUMBER_COLUMN_IDX;
use crate::datasource::extract_projection_expr;
use crate::duckdb::LogicalType;

#[test]
fn test_table_scan_progress() {
Expand All @@ -738,4 +769,65 @@ mod tests {
bytes_total.fetch_add(100, Relaxed);
assert!((progress(&bytes_read, &bytes_total) - 50.).abs() < f64::EPSILON);
}

#[test]
fn test_select_star() {
let ids = [0, 1, 2];
let fields = [
DuckdbField {
name: "".to_owned(),
logical_type: LogicalType::null(),
dtype: DType::Null,
},
DuckdbField {
name: "".to_owned(),
logical_type: LogicalType::null(),
dtype: DType::Null,
},
DuckdbField {
name: "".to_owned(),
logical_type: LogicalType::null(),
dtype: DType::Null,
},
];

assert_eq!(
extract_projection_expr(None, &ids, &fields).projection,
root()
);

let ids = [FILE_ROW_NUMBER_COLUMN_IDX, 0, 1, FILE_INDEX_COLUMN_IDX, 2];
let exprs = extract_projection_expr(None, &ids, &fields);
let row_idx = cast(row_idx(), DType::Primitive(PType::I64, false.into()));
let row_idx_struct = pack([("file_row_number", row_idx)], false.into());
let root_with_virtual_cols = merge([row_idx_struct, root()]);

assert_eq!(exprs.projection, root_with_virtual_cols);
assert_eq!(exprs.file_index_column_pos, Some(3));
assert_eq!(exprs.file_row_number_column_pos, Some(0));

// projections can't be set in SELECT *.
assert_ne!(
extract_projection_expr(Some(&[0, 1]), &ids, &fields).projection,
root()
);

let ids = [0, 1];
assert_ne!(
extract_projection_expr(None, &ids, &fields).projection,
root()
);

let ids = [0, 2, 2];
assert_ne!(
extract_projection_expr(None, &ids, &fields).projection,
root()
);

let ids = [2, 1, 0];
assert_ne!(
extract_projection_expr(None, &ids, &fields).projection,
root()
);
}
}
2 changes: 1 addition & 1 deletion vortex-duckdb/src/duckdb/table_function/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl<'a, T: TableFunction> TableInitInput<'a, T> {
}

pub fn projection_ids(&self) -> Option<&[u64]> {
if self.input.projection_ids.is_null() {
if self.input.projection_ids.is_null() || self.input.projection_ids_count == 0 {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is unexpected

return None;
}
Some(unsafe {
Expand Down
16 changes: 13 additions & 3 deletions vortex-sqllogictest/slt/duckdb/file_index.slt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ SELECT file_index, str FROM '$__TEST_DIR__/file_index_2.vortex';
0 3

query TB
SELECT *, file_index < 2 FROM '$__TEST_DIR__/*.vortex'
SELECT *, file_index < 2 FROM '$__TEST_DIR__/file_index_*.vortex'
ORDER BY str;
----
1 true
Expand All @@ -36,14 +36,24 @@ ORDER BY str;
Hello true
Hi true

query BT
SELECT file_index < 2, * FROM '$__TEST_DIR__/file_index_*.vortex'
ORDER BY str;
----
true 1
true 2
true 3
true Hello
true Hi

query IB
SELECT count(*) AS cnt, sum(file_index) <= 3 FROM '$__TEST_DIR__/*.vortex'
SELECT count(*) AS cnt, sum(file_index) <= 3 FROM '$__TEST_DIR__/file_index_*.vortex'
ORDER BY cnt;
----
5 true

query B
SELECT file_index < 2 FROM '$__TEST_DIR__/*.vortex'
SELECT file_index < 2 FROM '$__TEST_DIR__/file_index_*.vortex'
WHERE len(str) > 1;
----
true
Expand Down
Loading