Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion-testing
5 changes: 5 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,11 @@ config_namespace! {
/// By default, `nulls_max` is used to follow Postgres's behavior.
/// postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
pub default_null_ordering: String, default = "nulls_max".to_string()

/// When set to true, the SQL types `INTEGER`, `INT`, and `INT4` are mapped
/// to `Int64` (BIGINT) instead of the default `Int32`. This is useful for
/// SQLite compatibility, where `INTEGER` is a 64-bit type.
pub integer_type_is_bigint: bool, default = false
}
}

Expand Down
1 change: 1 addition & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ impl SessionState {
.default_null_ordering
.as_str()
.into(),
integer_type_is_bigint: sql_parser_options.integer_type_is_bigint,
}
}

Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3176,7 +3176,7 @@ mod tests {
// verify that the plan correctly casts u8 to i64
// the cast from u8 to i64 for literal will be simplified, and get lit(int64(5))
// the cast here is implicit so has CastOptions with safe=true
let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64 } }, fail_on_overflow: false"#;
let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64 } }, fail_on_overflow: true"#;

assert_contains!(format!("{exec_plan:?}"), expected);
Ok(())
Expand Down Expand Up @@ -3578,7 +3578,7 @@ mod tests {
let execution_plan = plan(&logical_plan).await?;
// verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.

let expected = r#"expr: BinaryExpr { left: BinaryExpr { left: Column { name: "c1", index: 0 }, op: Eq, right: Literal { value: Utf8("a"), field: Field { name: "lit", data_type: Utf8 } }, fail_on_overflow: false }"#;
let expected = r#"expr: BinaryExpr { left: BinaryExpr { left: Column { name: "c1", index: 0 }, op: Eq, right: Literal { value: Utf8("a"), field: Field { name: "lit", data_type: Utf8 } }, fail_on_overflow: true }"#;

assert_contains!(format!("{execution_plan:?}"), expected);

Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl BinaryExpr {
left,
op,
right,
fail_on_overflow: false,
fail_on_overflow: true,
}
}

Expand Down
4 changes: 2 additions & 2 deletions datafusion/physical-expr/src/expressions/dynamic_filters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,14 +498,14 @@ mod test {
)
.unwrap();
let snap = dynamic_filter_1.snapshot().unwrap().unwrap();
insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: true }"#);
let dynamic_filter_2 = reassign_expr_columns(
Arc::clone(&dynamic_filter) as Arc<dyn PhysicalExpr>,
&filter_schema_2,
)
.unwrap();
let snap = dynamic_filter_2.snapshot().unwrap().unwrap();
insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: true }"#);
// Both filters allow evaluating the same expression
let batch_1 = RecordBatch::try_new(
Arc::clone(&filter_schema_1),
Expand Down
17 changes: 16 additions & 1 deletion datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ pub struct ParserOptions {
pub map_string_types_to_utf8view: bool,
/// Default null ordering for sorting expressions.
pub default_null_ordering: NullOrdering,
/// When true, `INTEGER`/`INT`/`INT4` SQL types map to `Int64` instead of `Int32`.
/// Useful for SQLite compatibility where INTEGER is a 64-bit type.
pub integer_type_is_bigint: bool,
}

impl ParserOptions {
Expand All @@ -81,6 +84,7 @@ impl ParserOptions {
// By default, `nulls_max` is used to follow Postgres's behavior.
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
default_null_ordering: NullOrdering::NullsMax,
integer_type_is_bigint: false,
}
}

Expand Down Expand Up @@ -141,6 +145,12 @@ impl ParserOptions {
self.default_null_ordering = value;
self
}

/// Sets the `integer_type_is_bigint` option.
pub fn with_integer_type_is_bigint(mut self, value: bool) -> Self {
self.integer_type_is_bigint = value;
self
}
}

impl Default for ParserOptions {
Expand All @@ -160,6 +170,7 @@ impl From<&SqlParserOptions> for ParserOptions {
.enable_options_value_normalization,
collect_spans: options.collect_spans,
default_null_ordering: options.default_null_ordering.as_str().into(),
integer_type_is_bigint: options.integer_type_is_bigint,
}
}
}
Expand Down Expand Up @@ -663,7 +674,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::TinyInt(_) => Ok(DataType::Int8),
SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16),
SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => {
Ok(DataType::Int32)
if self.options.integer_type_is_bigint {
Ok(DataType::Int64)
} else {
Ok(DataType::Int32)
}
}
SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64),
SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8),
Expand Down
3 changes: 3 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3472,6 +3472,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
enable_options_value_normalization: false,
collect_spans: false,
default_null_ordering: NullOrdering::NullsMax,
integer_type_is_bigint: false,
}
}

Expand All @@ -3484,6 +3485,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
enable_options_value_normalization: false,
collect_spans: false,
default_null_ordering: NullOrdering::NullsMax,
integer_type_is_bigint: false,
}
}

Expand All @@ -3496,6 +3498,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
enable_options_value_normalization: false,
collect_spans: false,
default_null_ordering: NullOrdering::NullsMax,
integer_type_is_bigint: false,
}
}

Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ object_store = { workspace = true }
postgres-types = { version = "0.2.13", features = ["derive", "with-chrono-0_4"], optional = true }
# When updating the following dependency verify that sqlite test file regeneration works correctly
# by running the regenerate_sqlite_files.sh script.
regex = { workspace = true }
sqllogictest = "0.29.1"
sqlparser = { workspace = true }
tempfile = { workspace = true }
Expand All @@ -83,7 +84,6 @@ substrait = ["datafusion-substrait"]

[dev-dependencies]
env_logger = { workspace = true }
regex = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread"] }

[[test]]
Expand Down
11 changes: 10 additions & 1 deletion datafusion/sqllogictest/src/test_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ use datafusion::{
use datafusion_spark::SessionStateBuilderSpark;

use crate::is_spark_path;
use crate::is_sqlite_path;
use async_trait::async_trait;
use datafusion::common::cast::as_float64_array;
use datafusion::execution::SessionStateBuilder;
Expand Down Expand Up @@ -78,9 +79,17 @@ impl TestContext {
/// If `None` is returned (e.g. because some needed feature is not
/// enabled), the file should be skipped
pub async fn try_new_for_test_file(relative_path: &Path) -> Option<Self> {
let config = SessionConfig::new()
let mut config = SessionConfig::new()
// hardcode target partitions so plans are deterministic
.with_target_partitions(4);

// SQLite uses 64-bit integers for the INTEGER type, whereas DataFusion
// defaults to 32-bit. Enable the bigint mapping for SQLite-compatible tests.
if is_sqlite_path(relative_path) {
config =
config.set_bool("datafusion.sql_parser.integer_type_is_bigint", true);
}

let runtime = Arc::new(RuntimeEnv::default());

let mut state_builder = SessionStateBuilder::new()
Expand Down
4 changes: 4 additions & 0 deletions datafusion/sqllogictest/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ pub fn is_spark_path(relative_path: &Path) -> bool {
relative_path.starts_with("spark/")
}

pub fn is_sqlite_path(relative_path: &Path) -> bool {
relative_path.starts_with("sqlite")
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
6 changes: 3 additions & 3 deletions datafusion/sqllogictest/test_files/explain_analyze.slt
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ SELECT a+1, pow(a,2)
FROM generate_series(1, 100) as t1(a);
----
Plan with Metrics
01)ProjectionExec: expr=[a@0 + 1 as t1.a + Int64(1), power(CAST(a@0 AS Float64), 2) as pow(t1.a,Int64(2))], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=1632.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
01)ProjectionExec: expr=[a@0 + 1 as t1.a + Int64(1), power(CAST(a@0 AS Float64), 2) as pow(t1.a,Int64(2))], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
<slt:ignore>

# common expressions
Expand All @@ -163,8 +163,8 @@ SELECT a+1, a+1 as another_a_plus_one
FROM generate_series(1, 100) as t1(a);
----
Plan with Metrics
01)ProjectionExec: expr=[__common_expr_1@0 as t1.a + Int64(1), __common_expr_1@0 as another_a_plus_one], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=800.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
02)--ProjectionExec: expr=[a@0 + 1 as __common_expr_1], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=800.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>]
01)ProjectionExec: expr=[__common_expr_1@0 as t1.a + Int64(1), __common_expr_1@0 as another_a_plus_one], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
02)--ProjectionExec: expr=[a@0 + 1 as __common_expr_1], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, output_batches=1, expr_0_eval_time=<slt:ignore>]
<slt:ignore>

statement ok
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ datafusion.sql_parser.default_null_ordering nulls_max
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
datafusion.sql_parser.integer_type_is_bigint false
datafusion.sql_parser.map_string_types_to_utf8view true
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.recursion_limit 50
Expand Down Expand Up @@ -481,6 +482,7 @@ datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.integer_type_is_bigint false When set to true, the SQL types `INTEGER`, `INT`, and `INT4` are mapped to `Int64` (BIGINT) instead of the default `Int32`. This is useful for SQLite compatibility, where `INTEGER` is a 64-bit type.
datafusion.sql_parser.map_string_types_to_utf8view true If true, string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. If false, they are mapped to `Utf8`. Default is true.
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
Expand Down
31 changes: 31 additions & 0 deletions datafusion/sqllogictest/test_files/operator.slt
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,34 @@ set datafusion.explain.physical_plan_only = false;

statement ok
drop table numeric_types

############### Integer Overflow Behavior ###############
# SMALLINT (Int16) addition overflow
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 32767 \+ 2
SELECT CAST(32767 AS SMALLINT) + CAST(2 AS SMALLINT);

# SMALLINT (Int16) subtraction overflow
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: -32768 - 1
SELECT CAST(-32768 AS SMALLINT) - CAST(1 AS SMALLINT);

# SMALLINT (Int16) multiplication overflow
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 32767 \* 2
SELECT CAST(32767 AS SMALLINT) * CAST(2 AS SMALLINT);

# INT (Int32) addition overflow
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 \+ 1
SELECT CAST(2147483647 AS INT) + CAST(1 AS INT);

# BIGINT (Int64) addition overflow
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 \+ 1
SELECT CAST(9223372036854775807 AS BIGINT) + CAST(1 AS BIGINT);

# TINYINT (Int8) addition overflow
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 127 \+ 1
SELECT CAST(127 AS TINYINT) + CAST(1 AS TINYINT);

# No overflow within range - should succeed
query I
SELECT CAST(32766 AS SMALLINT) + CAST(1 AS SMALLINT);
----
32767
Loading
Loading