apache · xiedeyantu · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 3, 2026
diff --git a/datafusion-testing b/datafusion-testing
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -311,6 +311,11 @@ config_namespace! {
         /// By default, `nulls_max` is used to follow Postgres's behavior.
         /// postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
         pub default_null_ordering: String, default = "nulls_max".to_string()
+
+        /// When set to true, the SQL types `INTEGER`, `INT`, and `INT4` are mapped
+        /// to `Int64` (BIGINT) instead of the default `Int32`. This is useful for
+        /// SQLite compatibility, where `INTEGER` is a 64-bit type.
+        pub integer_type_is_bigint: bool, default = false
     }
 }
 

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
@@ -539,6 +539,7 @@ impl SessionState {
                 .default_null_ordering
                 .as_str()
                 .into(),
+            integer_type_is_bigint: sql_parser_options.integer_type_is_bigint,
         }
     }
 

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -3176,7 +3176,7 @@ mod tests {
         // verify that the plan correctly casts u8 to i64
         // the cast from u8 to i64 for literal will be simplified, and get lit(int64(5))
         // the cast here is implicit so has CastOptions with safe=true
-        let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64 } }, fail_on_overflow: false"#;
+        let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64 } }, fail_on_overflow: true"#;
 
         assert_contains!(format!("{exec_plan:?}"), expected);
         Ok(())
@@ -3578,7 +3578,7 @@ mod tests {
         let execution_plan = plan(&logical_plan).await?;
         // verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.
 
-        let expected = r#"expr: BinaryExpr { left: BinaryExpr { left: Column { name: "c1", index: 0 }, op: Eq, right: Literal { value: Utf8("a"), field: Field { name: "lit", data_type: Utf8 } }, fail_on_overflow: false }"#;
+        let expected = r#"expr: BinaryExpr { left: BinaryExpr { left: Column { name: "c1", index: 0 }, op: Eq, right: Literal { value: Utf8("a"), field: Field { name: "lit", data_type: Utf8 } }, fail_on_overflow: true }"#;
 
         assert_contains!(format!("{execution_plan:?}"), expected);
 

diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
@@ -88,7 +88,7 @@ impl BinaryExpr {
             left,
             op,
             right,
-            fail_on_overflow: false,
+            fail_on_overflow: true,
         }
     }
 

diff --git a/datafusion/physical-expr/src/expressions/dynamic_filters.rs b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
@@ -498,14 +498,14 @@ mod test {
         )
         .unwrap();
         let snap = dynamic_filter_1.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: true }"#);
         let dynamic_filter_2 = reassign_expr_columns(
             Arc::clone(&dynamic_filter) as Arc<dyn PhysicalExpr>,
             &filter_schema_2,
         )
         .unwrap();
         let snap = dynamic_filter_2.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: true }"#);
         // Both filters allow evaluating the same expression
         let batch_1 = RecordBatch::try_new(
             Arc::clone(&filter_schema_1),

diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
@@ -57,6 +57,9 @@ pub struct ParserOptions {
     pub map_string_types_to_utf8view: bool,
     /// Default null ordering for sorting expressions.
     pub default_null_ordering: NullOrdering,
+    /// When true, `INTEGER`/`INT`/`INT4` SQL types map to `Int64` instead of `Int32`.
+    /// Useful for SQLite compatibility where INTEGER is a 64-bit type.
+    pub integer_type_is_bigint: bool,
 }
 
 impl ParserOptions {
@@ -81,6 +84,7 @@ impl ParserOptions {
             // By default, `nulls_max` is used to follow Postgres's behavior.
             // postgres rule: https://www.postgresql.org/docs/current/queries-order.html
             default_null_ordering: NullOrdering::NullsMax,
+            integer_type_is_bigint: false,
         }
     }
 
@@ -141,6 +145,12 @@ impl ParserOptions {
         self.default_null_ordering = value;
         self
     }
+
+    /// Sets the `integer_type_is_bigint` option.
+    pub fn with_integer_type_is_bigint(mut self, value: bool) -> Self {
+        self.integer_type_is_bigint = value;
+        self
+    }
 }
 
 impl Default for ParserOptions {
@@ -160,6 +170,7 @@ impl From<&SqlParserOptions> for ParserOptions {
                 .enable_options_value_normalization,
             collect_spans: options.collect_spans,
             default_null_ordering: options.default_null_ordering.as_str().into(),
+            integer_type_is_bigint: options.integer_type_is_bigint,
         }
     }
 }
@@ -663,7 +674,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             SQLDataType::TinyInt(_) => Ok(DataType::Int8),
             SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16),
             SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => {
-                Ok(DataType::Int32)
+                if self.options.integer_type_is_bigint {
+                    Ok(DataType::Int64)
+                } else {
+                    Ok(DataType::Int32)
+                }
             }
             SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64),
             SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8),

diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
@@ -3472,6 +3472,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
         enable_options_value_normalization: false,
         collect_spans: false,
         default_null_ordering: NullOrdering::NullsMax,
+        integer_type_is_bigint: false,
     }
 }
 
@@ -3484,6 +3485,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
         enable_options_value_normalization: false,
         collect_spans: false,
         default_null_ordering: NullOrdering::NullsMax,
+        integer_type_is_bigint: false,
     }
 }
 
@@ -3496,6 +3498,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
         enable_options_value_normalization: false,
         collect_spans: false,
         default_null_ordering: NullOrdering::NullsMax,
+        integer_type_is_bigint: false,
     }
 }
 

diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
@@ -58,6 +58,7 @@ object_store = { workspace = true }
 postgres-types = { version = "0.2.13", features = ["derive", "with-chrono-0_4"], optional = true }
 # When updating the following dependency verify that sqlite test file regeneration works correctly
 # by running the regenerate_sqlite_files.sh script.
+regex = { workspace = true }
 sqllogictest = "0.29.1"
 sqlparser = { workspace = true }
 tempfile = { workspace = true }
@@ -83,7 +84,6 @@ substrait = ["datafusion-substrait"]
 
 [dev-dependencies]
 env_logger = { workspace = true }
-regex = { workspace = true }
 tokio = { workspace = true, features = ["rt-multi-thread"] }
 
 [[test]]

diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs
@@ -49,6 +49,7 @@ use datafusion::{
 use datafusion_spark::SessionStateBuilderSpark;
 
 use crate::is_spark_path;
+use crate::is_sqlite_path;
 use async_trait::async_trait;
 use datafusion::common::cast::as_float64_array;
 use datafusion::execution::SessionStateBuilder;
@@ -78,9 +79,17 @@ impl TestContext {
     /// If `None` is returned (e.g. because some needed feature is not
     /// enabled), the file should be skipped
     pub async fn try_new_for_test_file(relative_path: &Path) -> Option<Self> {
-        let config = SessionConfig::new()
+        let mut config = SessionConfig::new()
             // hardcode target partitions so plans are deterministic
             .with_target_partitions(4);
+
+        // SQLite uses 64-bit integers for the INTEGER type, whereas DataFusion
+        // defaults to 32-bit. Enable the bigint mapping for SQLite-compatible tests.
+        if is_sqlite_path(relative_path) {
+            config =
+                config.set_bool("datafusion.sql_parser.integer_type_is_bigint", true);
+        }
+
         let runtime = Arc::new(RuntimeEnv::default());
 
         let mut state_builder = SessionStateBuilder::new()

diff --git a/datafusion/sqllogictest/src/util.rs b/datafusion/sqllogictest/src/util.rs
@@ -141,6 +141,10 @@ pub fn is_spark_path(relative_path: &Path) -> bool {
     relative_path.starts_with("spark/")
 }
 
+pub fn is_sqlite_path(relative_path: &Path) -> bool {
+    relative_path.starts_with("sqlite")
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

diff --git a/datafusion/sqllogictest/test_files/explain_analyze.slt b/datafusion/sqllogictest/test_files/explain_analyze.slt
@@ -152,7 +152,7 @@ SELECT a+1, pow(a,2)
 FROM generate_series(1, 100) as t1(a);
 ----
 Plan with Metrics
-01)ProjectionExec: expr=[a@0 + 1 as t1.a + Int64(1), power(CAST(a@0 AS Float64), 2) as pow(t1.a,Int64(2))], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=1632.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
+01)ProjectionExec: expr=[a@0 + 1 as t1.a + Int64(1), power(CAST(a@0 AS Float64), 2) as pow(t1.a,Int64(2))], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
 <slt:ignore>
 
 # common expressions
@@ -163,8 +163,8 @@ SELECT a+1, a+1 as another_a_plus_one
 FROM generate_series(1, 100) as t1(a);
 ----
 Plan with Metrics
-01)ProjectionExec: expr=[__common_expr_1@0 as t1.a + Int64(1), __common_expr_1@0 as another_a_plus_one], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=800.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
-02)--ProjectionExec: expr=[a@0 + 1 as __common_expr_1], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=800.0 B, output_batches=1, expr_0_eval_time=<slt:ignore>]
+01)ProjectionExec: expr=[__common_expr_1@0 as t1.a + Int64(1), __common_expr_1@0 as another_a_plus_one], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, output_batches=1, expr_0_eval_time=<slt:ignore>, expr_1_eval_time=<slt:ignore>]
+02)--ProjectionExec: expr=[a@0 + 1 as __common_expr_1], metrics=[output_rows=100, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, output_batches=1, expr_0_eval_time=<slt:ignore>]
 <slt:ignore>
 
 statement ok

diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -339,6 +339,7 @@ datafusion.sql_parser.default_null_ordering nulls_max
 datafusion.sql_parser.dialect generic
 datafusion.sql_parser.enable_ident_normalization true
 datafusion.sql_parser.enable_options_value_normalization false
+datafusion.sql_parser.integer_type_is_bigint false
 datafusion.sql_parser.map_string_types_to_utf8view true
 datafusion.sql_parser.parse_float_as_decimal false
 datafusion.sql_parser.recursion_limit 50
@@ -481,6 +482,7 @@ datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
 datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
 datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
+datafusion.sql_parser.integer_type_is_bigint false When set to true, the SQL types `INTEGER`, `INT`, and `INT4` are mapped to `Int64` (BIGINT) instead of the default `Int32`. This is useful for SQLite compatibility, where `INTEGER` is a 64-bit type.
 datafusion.sql_parser.map_string_types_to_utf8view true If true, string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. If false, they are mapped to `Utf8`. Default is true.
 datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
 datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries

diff --git a/datafusion/sqllogictest/test_files/operator.slt b/datafusion/sqllogictest/test_files/operator.slt
@@ -363,3 +363,34 @@ set datafusion.explain.physical_plan_only = false;
 
 statement ok
 drop table numeric_types
+
+############### Integer Overflow Behavior ###############
+# SMALLINT (Int16) addition overflow
+query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 32767 \+ 2
+SELECT CAST(32767 AS SMALLINT) + CAST(2 AS SMALLINT);
+
+# SMALLINT (Int16) subtraction overflow
+query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: -32768 - 1
+SELECT CAST(-32768 AS SMALLINT) - CAST(1 AS SMALLINT);
+
+# SMALLINT (Int16) multiplication overflow
+query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 32767 \* 2
+SELECT CAST(32767 AS SMALLINT) * CAST(2 AS SMALLINT);
+
+# INT (Int32) addition overflow
+query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 \+ 1
+SELECT CAST(2147483647 AS INT) + CAST(1 AS INT);
+
+# BIGINT (Int64) addition overflow
+query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 \+ 1
+SELECT CAST(9223372036854775807 AS BIGINT) + CAST(1 AS BIGINT);
+
+# TINYINT (Int8) addition overflow
+query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 127 \+ 1
+SELECT CAST(127 AS TINYINT) + CAST(1 AS TINYINT);
+
+# No overflow within range - should succeed
+query I
+SELECT CAST(32766 AS SMALLINT) + CAST(1 AS SMALLINT);
+----
+32767
-Original file line number
+Diff line change
@@ Expand Up / @@ -88,7 +88,7 @@ impl BinaryExpr { @@
                 left,
                 op,
                 right,
-                fail_on_overflow: false,
+                fail_on_overflow: true,
             }
         }
@@ Expand Down @@