diff --git a/Cargo.lock b/Cargo.lock index d35cbc8bdad..ef9472b3416 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -145,6 +145,35 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "apache-avro" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" +dependencies = [ + "bigdecimal", + "bon", + "bzip2", + "crc32fast", + "digest", + "liblzma", + "log", + "miniz_oxide", + "num-bigint", + "quad-rand", + "rand 0.9.2", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "snap", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.18", + "uuid", + "zstd", +] + [[package]] name = "approx" version = "0.5.1" @@ -154,6 +183,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ar_archive_writer" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +dependencies = [ + "object", +] + [[package]] name = "arbitrary" version = "1.4.2" @@ -446,6 +484,7 @@ dependencies = [ "arrow-select 57.2.0", "flatbuffers", "lz4_flex 0.12.0", + "zstd", ] [[package]] @@ -1258,6 +1297,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", + "serde", ] [[package]] @@ -1487,7 +1527,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c6d47a4e2961fb8721bcfc54feae6455f2f64e7054f9bc67e875f0e77f4c58d" dependencies = [ "rust_decimal", - "schemars", + "schemars 1.2.0", "serde", "utf8-width", ] @@ -1775,7 +1815,7 @@ checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -1899,7 +1939,7 @@ checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ "strum 0.26.3", "strum_macros 0.26.4", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -1945,9 +1985,13 @@ version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" dependencies = [ + "bzip2", "compression-core", "flate2", + "liblzma", "memchr", + "zstd", + "zstd-safe", ] [[package]] @@ -1992,7 +2036,7 @@ dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width", + "unicode-width 0.2.2", "windows-sys 0.61.2", ] @@ -2443,6 +2487,7 @@ dependencies = [ "arrow-schema 57.2.0", "async-trait", "bytes", + "bzip2", "chrono", "datafusion-catalog 52.1.0", "datafusion-catalog-listing 52.1.0", @@ -2450,6 +2495,7 @@ dependencies = [ "datafusion-common-runtime 52.1.0", "datafusion-datasource 52.1.0", "datafusion-datasource-arrow", + "datafusion-datasource-avro", "datafusion-datasource-csv 52.1.0", "datafusion-datasource-json 52.1.0", "datafusion-datasource-parquet", @@ -2469,8 +2515,10 @@ dependencies = [ "datafusion-physical-plan 52.1.0", "datafusion-session 52.1.0", "datafusion-sql 52.1.0", + "flate2", "futures", "itertools 0.14.0", + "liblzma", "log", "object_store", "parking_lot", @@ -2482,6 +2530,7 @@ dependencies = [ "tokio", "url", "uuid", + "zstd", ] [[package]] @@ -2636,6 +2685,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3237a6ff0d2149af4631290074289cae548c9863c885d821315d54c6673a074a" dependencies = [ "ahash 0.8.12", + "apache-avro", "arrow 57.2.0", "arrow-ipc 57.2.0", "chrono", @@ -2647,6 +2697,7 @@ dependencies = [ "object_store", "parquet 57.2.0", "paste", + "recursive", "sqlparser 0.59.0", "tokio", "web-time", @@ -2710,8 +2761,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b2a6be734cc3785e18bbf2a7f2b22537f6b9fb960d79617775a51568c281842" dependencies = [ "arrow 57.2.0", + "async-compression", "async-trait", "bytes", + "bzip2", "chrono", "datafusion-common 52.1.0", "datafusion-common-runtime 52.1.0", @@ -2722,14 +2775,18 @@ dependencies = [ "datafusion-physical-expr-common 52.1.0", "datafusion-physical-plan 52.1.0", "datafusion-session 52.1.0", + "flate2", "futures", "glob", "itertools 0.14.0", + "liblzma", "log", "object_store", "rand 0.9.2", "tokio", + "tokio-util", "url", + "zstd", ] [[package]] @@ -2756,6 +2813,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-datasource-avro" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "828088c2fb681cc0e06fb42f541f76c82a0c10278f9fd6334e22c8d1e3574ee7" +dependencies = [ + "apache-avro", + "arrow 57.2.0", + "async-trait", + "bytes", + "datafusion-common 52.1.0", + "datafusion-datasource 52.1.0", + "datafusion-physical-expr-common 52.1.0", + "datafusion-physical-plan 52.1.0", + "datafusion-session 52.1.0", + "futures", + "num-traits", + "object_store", +] + [[package]] name = "datafusion-datasource-csv" version = "50.3.0" @@ -2973,6 +3050,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "paste", + "recursive", "serde_json", "sqlparser 0.59.0", ] @@ -3041,6 +3119,8 @@ dependencies = [ "arrow 57.2.0", "arrow-buffer 57.2.0", "base64", + "blake2", + "blake3", "chrono", "chrono-tz", "datafusion-common 52.1.0", @@ -3052,9 +3132,11 @@ dependencies = [ "hex", "itertools 0.14.0", "log", + "md-5", "num-traits", "rand 0.9.2", "regex", + "sha2", "unicode-segmentation", "uuid", ] @@ -3316,6 +3398,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "recursive", "regex", "regex-syntax", ] @@ -3363,6 +3446,7 @@ dependencies = [ "parking_lot", "paste", "petgraph 0.8.3", + "recursive", "tokio", ] @@ -3462,6 +3546,7 @@ dependencies = [ "datafusion-physical-plan 52.1.0", "datafusion-pruning 52.1.0", "itertools 0.14.0", + "recursive", ] [[package]] @@ -3599,6 +3684,29 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "datafusion-spark" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556c431f5f2259620c8223254c0ef57aa9a85c576d4da0166157260f71eb0e25" +dependencies = [ + "arrow 57.2.0", + "bigdecimal", + "chrono", + "crc32fast", + "datafusion-catalog 52.1.0", + "datafusion-common 52.1.0", + "datafusion-execution 52.1.0", + "datafusion-expr 52.1.0", + "datafusion-functions 52.1.0", + "datafusion-functions-nested 52.1.0", + "log", + "percent-encoding", + "rand 0.9.2", + "sha1", + "url", +] + [[package]] name = "datafusion-sql" version = "50.3.0" @@ -3628,10 +3736,58 @@ dependencies = [ "datafusion-expr 52.1.0", "indexmap", "log", + "recursive", "regex", "sqlparser 0.59.0", ] +[[package]] +name = "datafusion-sqllogictest" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d388fec80647198ae041d314dd7d9e2305207836ecec3ad48908eac6844cdef" +dependencies = [ + "arrow 57.2.0", + "async-trait", + "bigdecimal", + "clap", + "datafusion 52.1.0", + "datafusion-spark", + "datafusion-substrait", + "futures", + "half", + "indicatif", + "itertools 0.14.0", + "log", + "object_store", + "sqllogictest", + "sqlparser 0.59.0", + "tempfile", + "thiserror 2.0.18", + "tokio", +] + +[[package]] +name = "datafusion-substrait" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6042adacd0bd64e56c22f6a7f9ce0ce1793dd367c899d868179d029f110d9215" +dependencies = [ + "async-recursion", + "async-trait", + "chrono", + "datafusion 52.1.0", + "half", + "itertools 0.14.0", + "object_store", + "pbjson-types", + "prost 0.14.3", + "substrait", + "tokio", + "url", + "uuid", +] + [[package]] name = "deepsize" version = "0.2.0" @@ -3832,6 +3988,18 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "educe" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "either" version = "1.15.0" @@ -3873,6 +4041,26 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "enum-ordinalize" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "env_filter" version = "0.1.4" @@ -3918,6 +4106,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "escape8259" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" + [[package]] name = "ethnum" version = "1.5.2" @@ -4156,6 +4350,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-err" +version = "3.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf68cef89750956493a66a10f512b9e58d9db21f2a573c079c0bdf1207a54a7" +dependencies = [ + "autocfg", +] + [[package]] name = "fs4" version = "0.8.4" @@ -4874,7 +5077,7 @@ dependencies = [ "console 0.16.2", "futures-core", "portable-atomic", - "unicode-width", + "unicode-width 0.2.2", "unit-prefix", "web-time", ] @@ -5820,6 +6023,18 @@ dependencies = [ "redox_syscall 0.7.0", ] +[[package]] +name = "libtest-mimic" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" +dependencies = [ + "anstream", + "anstyle", + "clap", + "escape8259", +] + [[package]] name = "line-clipping" version = "0.3.5" @@ -6383,6 +6598,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", + "serde", ] [[package]] @@ -6527,6 +6743,15 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + [[package]] name = "object_store" version = "0.12.5" @@ -6816,6 +7041,12 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "owo-colors" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" + [[package]] name = "papergrid" version = "0.17.0" @@ -6824,7 +7055,7 @@ checksum = "6978128c8b51d8f4080631ceb2302ab51e32cc6e8615f735ee2f83fd269ae3f1" dependencies = [ "bytecount", "fnv", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -6972,6 +7203,43 @@ dependencies = [ "stfu8", ] +[[package]] +name = "pbjson" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3" +dependencies = [ + "base64", + "serde", +] + +[[package]] +name = "pbjson-build" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095" +dependencies = [ + "heck", + "itertools 0.14.0", + "prost 0.14.3", + "prost-types 0.14.3", +] + +[[package]] +name = "pbjson-types" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526" +dependencies = [ + "bytes", + "chrono", + "pbjson", + "pbjson-build", + "prost 0.14.3", + "prost-build 0.14.3", + "serde", +] + [[package]] name = "pbkdf2" version = "0.12.2" @@ -7538,6 +7806,16 @@ dependencies = [ "prost 0.14.3", ] +[[package]] +name = "psm" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa96cb91275ed31d6da3e983447320c4eb219ac180fa1679a0889ff32861e2d" +dependencies = [ + "ar_archive_writer", + "cc", +] + [[package]] name = "ptr_meta" version = "0.1.4" @@ -7678,6 +7956,12 @@ dependencies = [ "url", ] +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + [[package]] name = "quick-xml" version = "0.37.5" @@ -7934,7 +8218,7 @@ dependencies = [ "thiserror 2.0.18", "unicode-segmentation", "unicode-truncate", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -7985,7 +8269,7 @@ dependencies = [ "strum 0.27.2", "time", "unicode-segmentation", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -8014,6 +8298,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.114", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -8098,6 +8402,16 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "regress" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" +dependencies = [ + "hashbrown 0.16.1", + "memchr", +] + [[package]] name = "relative-path" version = "1.9.3" @@ -8499,6 +8813,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + [[package]] name = "schemars" version = "1.2.0" @@ -8511,6 +8837,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.114", +] + [[package]] name = "scoped-tls" version = "1.0.1" @@ -8587,6 +8925,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "seq-macro" @@ -8614,6 +8956,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_bytes" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" +dependencies = [ + "serde", + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -8634,6 +8986,17 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "serde_json" version = "1.0.149" @@ -8676,6 +9039,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_tokenstream" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "syn 2.0.114", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -8688,6 +9063,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -8935,6 +9323,31 @@ dependencies = [ "der", ] +[[package]] +name = "sqllogictest" +version = "0.28.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3566426f72a13e393aa34ca3d542c5b0eb86da4c0db137ee9b5cfccc6179e52d" +dependencies = [ + "async-trait", + "educe", + "fs-err", + "futures", + "glob", + "humantime", + "itertools 0.13.0", + "libtest-mimic", + "md-5", + "owo-colors", + "rand 0.8.5", + "regex", + "similar", + "subst", + "tempfile", + "thiserror 2.0.18", + "tracing", +] + [[package]] name = "sqlparser" version = "0.58.0" @@ -8952,6 +9365,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -8972,6 +9386,19 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -9049,6 +9476,41 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "subst" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a9a86e5144f63c2d18334698269a8bfae6eece345c70b64821ea5b35054ec99" +dependencies = [ + "memchr", + "unicode-width 0.1.14", +] + +[[package]] +name = "substrait" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" +dependencies = [ + "heck", + "pbjson", + "pbjson-build", + "pbjson-types", + "prettyplease", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "regress", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.114", + "typify", + "walkdir", +] + [[package]] name = "subtle" version = "2.6.1" @@ -9483,7 +9945,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f8daae29995a24f65619e19d8d31dea5b389f3d853d8bf297bbf607cd0014cc" dependencies = [ - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -9962,6 +10424,53 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typify" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629" +dependencies = [ + "typify-impl", + "typify-macro", +] + +[[package]] +name = "typify-impl" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240" +dependencies = [ + "heck", + "log", + "proc-macro2", + "quote", + "regress", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "syn 2.0.114", + "thiserror 2.0.18", + "unicode-ident", +] + +[[package]] +name = "typify-macro" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a" +dependencies = [ + "proc-macro2", + "quote", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.114", + "typify-impl", +] + [[package]] name = "ucd-trie" version = "0.1.7" @@ -10007,9 +10516,15 @@ checksum = "16b380a1238663e5f8a691f9039c73e1cdae598a30e9855f541d29b08b53e9a5" dependencies = [ "itertools 0.14.0", "unicode-segmentation", - "unicode-width", + "unicode-width 0.2.2", ] +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-width" version = "0.2.2" @@ -10034,6 +10549,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -11059,6 +11580,23 @@ dependencies = [ "vortex-scalar", ] +[[package]] +name = "vortex-sqllogictest" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "clap", + "datafusion 52.1.0", + "datafusion-sqllogictest", + "sqllogictest", + "thiserror 2.0.18", + "tokio", + "vortex", + "vortex-datafusion", + "vortex-duckdb", +] + [[package]] name = "vortex-test-e2e" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 1de373ab126..98459ba0024 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ members = [ "benchmarks/datafusion-bench", "benchmarks/duckdb-bench", "benchmarks/random-access-bench", + "vortex-sqllogictest", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" @@ -133,6 +134,7 @@ datafusion-physical-expr-adapter = { version = "52" } datafusion-physical-expr-common = { version = "52" } datafusion-physical-plan = { version = "52" } datafusion-pruning = { version = "52" } +datafusion-sqllogictest = { version = "52" } dirs = "6.0.0" divan = { package = "codspeed-divan-compat", version = "4.0.4" } enum-iterator = "2.0.0" diff --git a/vortex-duckdb/src/duckdb/connection.rs b/vortex-duckdb/src/duckdb/connection.rs index af712acd09d..4670f79724d 100644 --- a/vortex-duckdb/src/duckdb/connection.rs +++ b/vortex-duckdb/src/duckdb/connection.rs @@ -200,8 +200,14 @@ mod tests { .query("SELECT 1 as int_col, 'text' as str_col") .unwrap(); - assert_eq!(result.column_type(0), cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - assert_eq!(result.column_type(1), cpp::DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); + assert_eq!( + result.column_type(0).as_type_id(), + cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER + ); + assert_eq!( + result.column_type(1).as_type_id(), + cpp::DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR + ); } #[test] diff --git a/vortex-duckdb/src/duckdb/logical_type.rs b/vortex-duckdb/src/duckdb/logical_type.rs index 61d393a1a9d..59604fa7b66 100644 --- a/vortex-duckdb/src/duckdb/logical_type.rs +++ b/vortex-duckdb/src/duckdb/logical_type.rs @@ -134,10 +134,6 @@ impl LogicalType { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BLOB) } - pub fn int64() -> Self { - Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT) - } - pub fn uint64() -> Self { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT) } @@ -146,10 +142,22 @@ impl LogicalType { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER) } + pub fn int64() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT) + } + pub fn bool() -> Self { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BOOLEAN) } + pub fn float32() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_FLOAT) + } + + pub fn float64() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_DOUBLE) + } + pub fn as_decimal(&self) -> (u8, u8) { unsafe { ( diff --git a/vortex-duckdb/src/duckdb/query_result.rs b/vortex-duckdb/src/duckdb/query_result.rs index ff285f9ff84..bad1941e10e 100644 --- a/vortex-duckdb/src/duckdb/query_result.rs +++ b/vortex-duckdb/src/duckdb/query_result.rs @@ -7,6 +7,7 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_err; +use crate::LogicalType; use crate::cpp; use crate::duckdb::DataChunk; use crate::wrapper; @@ -67,8 +68,9 @@ impl QueryResult { } /// Get the type of a column by index. - pub fn column_type(&self, col_idx: usize) -> cpp::DUCKDB_TYPE { - unsafe { cpp::duckdb_column_type(self.as_ptr(), col_idx as u64) } + pub fn column_type(&self, col_idx: usize) -> LogicalType { + let dtype = unsafe { cpp::duckdb_column_type(self.as_ptr(), col_idx as u64) }; + LogicalType::new(dtype) } } diff --git a/vortex-duckdb/src/lib.rs b/vortex-duckdb/src/lib.rs index d4ffaf90eb0..9f85fae772c 100644 --- a/vortex-duckdb/src/lib.rs +++ b/vortex-duckdb/src/lib.rs @@ -23,7 +23,7 @@ pub use crate::duckdb::LogicalType; pub use crate::duckdb::Value; use crate::scan::VortexTableFunction; -mod convert; +pub mod convert; pub mod duckdb; pub mod exporter; mod scan; diff --git a/vortex-sqllogictest/Cargo.toml b/vortex-sqllogictest/Cargo.toml new file mode 100644 index 00000000000..bbea16dba29 --- /dev/null +++ b/vortex-sqllogictest/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "vortex-sqllogictest" +authors = { workspace = true } +description = "Test runner for SQL integrations" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +publish = false +readme = "README.md" +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[dependencies] +anyhow = { workspace = true } +async-trait = { workspace = true } +clap = { workspace = true, features = ["derive"] } +datafusion = { workspace = true } +datafusion-sqllogictest = { workspace = true } +sqllogictest = "0.28" +thiserror = { workspace = true } +tokio = { workspace = true, features = ["full"] } +vortex = { workspace = true } +vortex-datafusion = { workspace = true } +vortex-duckdb = { workspace = true } + +[lints] +workspace = true + +[[test]] +harness = false +name = "sqllogictests" +path = "bin/sqllogictests-runner.rs" diff --git a/vortex-sqllogictest/bin/sqllogictests-runner.rs b/vortex-sqllogictest/bin/sqllogictests-runner.rs new file mode 100644 index 00000000000..8b129ee9e11 --- /dev/null +++ b/vortex-sqllogictest/bin/sqllogictests-runner.rs @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use clap::Parser; +use vortex_sqllogictest::args::Args; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = Args::parse(); + println!("Hello, world!"); + println!("Args: {args:?}"); + + Ok(()) +} diff --git a/vortex-sqllogictest/src/args.rs b/vortex-sqllogictest/src/args.rs new file mode 100644 index 00000000000..ee4e29e1106 --- /dev/null +++ b/vortex-sqllogictest/src/args.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use clap::Parser; + +#[derive(clap::ValueEnum, Clone, Copy, Debug)] +pub enum Engine { + #[clap(name = "datafusion")] + DataFusion, + #[clap(name = "duckdb")] + DuckDB, +} + +#[derive(Parser, Debug)] +pub struct Args { + #[arg(short, long, value_enum, value_delimiter = ',')] + engine: Option>, + #[arg(action)] + filter: Option, +} diff --git a/vortex-sqllogictest/src/duckdb.rs b/vortex-sqllogictest/src/duckdb.rs new file mode 100644 index 00000000000..9432332a551 --- /dev/null +++ b/vortex-sqllogictest/src/duckdb.rs @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; +use std::{process::Command, time::Duration}; + +use async_trait::async_trait; +use datafusion_sqllogictest::DFColumnType; +use sqllogictest::{DBOutput, runner::AsyncDB}; + +use vortex_duckdb::LogicalType; +use vortex_duckdb::duckdb::Connection; +use vortex_duckdb::duckdb::Database; +use vortex_duckdb::duckdb::{Config, DuckDBType}; + +use crate::error::TestError; + +struct Inner { + conn: Connection, + db: Database, +} + +unsafe impl Send for Inner {} +unsafe impl Sync for Inner {} + +struct DuckDB { + inner: Arc, +} + +impl DuckDB { + fn normalize_column_type(dtype: LogicalType) -> DFColumnType { + let type_id = dtype.as_type_id(); + if type_id == LogicalType::int32().as_type_id() + || type_id == LogicalType::int64().as_type_id() + || type_id == LogicalType::uint64().as_type_id() + { + DFColumnType::Integer + } else if type_id == LogicalType::varchar().as_type_id() { + DFColumnType::Text + } else if type_id == LogicalType::bool().as_type_id() { + DFColumnType::Boolean + } else if type_id == LogicalType::float32().as_type_id() + || type_id == LogicalType::float64().as_type_id() + { + DFColumnType::Float + } else { + DFColumnType::Another + } + } +} + +#[async_trait] +impl AsyncDB for DuckDB { + type Error = TestError; + type ColumnType = DFColumnType; + + async fn run(&mut self, sql: &str) -> Result, Self::Error> { + let r = self + .inner + .conn + .query(sql) + .map_err(|e| TestError::Other(e.to_string()))?; + + if r.column_count() == 0 && r.row_count() == 0 { + Ok(DBOutput::StatementComplete(0)) + } else { + let mut types = Vec::default(); + let rows = Vec::default(); + + for col_idx in 0..r.column_count() as usize { + let dtype = r.column_type(col_idx); + types.push(Self::normalize_column_type(dtype)); + } + + Ok(DBOutput::Rows { types, rows }) + } + } + + async fn shutdown(&mut self) { + todo!() + } + + fn engine_name(&self) -> &str { + "DuckDB" + } + + async fn sleep(dur: Duration) { + tokio::time::sleep(dur).await + } + + /// [`Runner`] calls this function to run a system command. + /// + /// The default implementation is `std::process::Command::output`, which is universal to any + /// async runtime but would block the current thread. If you are running in tokio runtime, you + /// should override this by `tokio::process::Command::output`. + async fn run_command(command: Command) -> std::io::Result { + tokio::process::Command::from(command).output().await + } +} diff --git a/vortex-sqllogictest/src/error.rs b/vortex-sqllogictest/src/error.rs new file mode 100644 index 00000000000..9957103e062 --- /dev/null +++ b/vortex-sqllogictest/src/error.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use datafusion_sqllogictest::DFSqlLogicTestError; + +#[derive(Debug, thiserror::Error)] +pub enum TestError { + Other(String), +} + +impl std::fmt::Display for TestError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TestError::Other(msg) => write!(f, "Other: {msg}"), + } + } +} + +impl From for TestError { + fn from(value: DFSqlLogicTestError) -> Self { + match value { + DFSqlLogicTestError::SqlLogicTest(test_error) => todo!(), + DFSqlLogicTestError::DataFusion(data_fusion_error) => todo!(), + DFSqlLogicTestError::Sql(parser_error) => todo!(), + DFSqlLogicTestError::Arrow(arrow_error) => todo!(), + DFSqlLogicTestError::Other(_) => todo!(), + } + } +} diff --git a/vortex-sqllogictest/src/lib.rs b/vortex-sqllogictest/src/lib.rs new file mode 100644 index 00000000000..e6adcb5ef0b --- /dev/null +++ b/vortex-sqllogictest/src/lib.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +pub mod args; +pub mod duckdb; +pub mod error;