diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d9122acb1a0..886d6cd5204 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -221,7 +221,7 @@ jobs: target: wasm32-unknown-unknown env: rustflags: "RUSTFLAGS='-A warnings --cfg getrandom_backend=\"wasm_js\"'" - args: "--target wasm32-unknown-unknown --exclude vortex --exclude vortex-cuda --exclude vortex-cub --exclude vortex-nvcomp --exclude vortex-datafusion --exclude vortex-duckdb --exclude vortex-tui --exclude vortex-zstd --exclude vortex-test-e2e-cuda" + args: "--target wasm32-unknown-unknown --exclude vortex --exclude vortex-cuda --exclude vortex-cub --exclude vortex-nvcomp --exclude vortex-datafusion --exclude vortex-duckdb --exclude vortex-tui --exclude vortex-zstd --exclude vortex-test-e2e-cuda --exclude vortex-sqllogictest" steps: - uses: runs-on/action@v2 with: @@ -615,10 +615,12 @@ jobs: --exclude vortex-fuzz --exclude vortex-cuda --exclude vortex-nvcomp ` --exclude vortex-cub --exclude vortex-test-e2e-cuda --exclude duckdb-bench ` --exclude lance-bench --exclude datafusion-bench --exclude random-access-bench ` - --exclude compress-bench --exclude xtask --exclude vortex-datafusion + --exclude compress-bench --exclude xtask --exclude vortex-datafusion ` + --exclude vortex-sqllogictest - name: Rust Tests (Other) if: matrix.os != 'windows-x64' - run: cargo nextest run --locked --workspace --all-features --no-fail-fast --exclude vortex-bench --exclude xtask + run: | + cargo nextest run --locked --workspace --all-features --no-fail-fast --exclude vortex-bench --exclude xtask --exclude vortex-sqllogictest build-java: name: "Java" @@ -735,6 +737,29 @@ jobs: cmake --build vortex-cxx/examples/build --parallel $(nproc) vortex-cxx/examples/build/hello-vortex vortex-cxx/examples/goldenfiles/example.vortex + sqllogic-test: + name: "SQL logic tests" + runs-on: + - runs-on=${{ github.run_id }} + - family=m7i+m7i-flex+m7a + - cpu=8 + - image=ubuntu24-full-x64 + - extras=s3-cache + - tag=sql-logic-test + steps: + - uses: runs-on/action@v2 + with: + sccache: s3 + - uses: actions/checkout@v6 + - id: setup-rust + uses: ./.github/actions/setup-rust + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Run sqllogictest tests + run: | + cargo test -p vortex-sqllogictest --test sqllogictests + + wasm-integration: name: "wasm-integration" runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index fc193157f44..fdcf451ed69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -145,6 +145,35 @@ version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +[[package]] +name = "apache-avro" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" +dependencies = [ + "bigdecimal", + "bon", + "bzip2", + "crc32fast", + "digest", + "liblzma", + "log", + "miniz_oxide", + "num-bigint", + "quad-rand", + "rand 0.9.2", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "snap", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.18", + "uuid", + "zstd", +] + [[package]] name = "approx" version = "0.5.1" @@ -154,6 +183,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ar_archive_writer" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +dependencies = [ + "object", +] + [[package]] name = "arbitrary" version = "1.4.2" @@ -1064,6 +1102,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", + "serde", ] [[package]] @@ -1311,7 +1350,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c6d47a4e2961fb8721bcfc54feae6455f2f64e7054f9bc67e875f0e77f4c58d" dependencies = [ "rust_decimal", - "schemars", + "schemars 1.2.1", "serde", "utf8-width", ] @@ -1599,7 +1638,7 @@ checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -1723,7 +1762,7 @@ checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ "strum 0.26.3", "strum_macros 0.26.4", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -1769,9 +1808,13 @@ version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" dependencies = [ + "bzip2", "compression-core", "flate2", + "liblzma", "memchr", + "zstd", + "zstd-safe", ] [[package]] @@ -1816,7 +1859,7 @@ dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width", + "unicode-width 0.2.2", "windows-sys 0.61.2", ] @@ -2280,6 +2323,7 @@ dependencies = [ "arrow-schema", "async-trait", "bytes", + "bzip2", "chrono", "datafusion-catalog 52.1.0", "datafusion-catalog-listing 52.1.0", @@ -2287,6 +2331,7 @@ dependencies = [ "datafusion-common-runtime 52.1.0", "datafusion-datasource 52.1.0", "datafusion-datasource-arrow 52.1.0", + "datafusion-datasource-avro", "datafusion-datasource-csv 52.1.0", "datafusion-datasource-json 52.1.0", "datafusion-datasource-parquet", @@ -2306,8 +2351,10 @@ dependencies = [ "datafusion-physical-plan 52.1.0", "datafusion-session 52.1.0", "datafusion-sql 52.1.0", + "flate2", "futures", "itertools 0.14.0", + "liblzma", "log", "object_store", "parking_lot", @@ -2319,6 +2366,7 @@ dependencies = [ "tokio", "url", "uuid", + "zstd", ] [[package]] @@ -2474,6 +2522,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3237a6ff0d2149af4631290074289cae548c9863c885d821315d54c6673a074a" dependencies = [ "ahash 0.8.12", + "apache-avro", "arrow", "arrow-ipc", "chrono", @@ -2485,6 +2534,7 @@ dependencies = [ "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", "web-time", @@ -2548,8 +2598,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b2a6be734cc3785e18bbf2a7f2b22537f6b9fb960d79617775a51568c281842" dependencies = [ "arrow", + "async-compression", "async-trait", "bytes", + "bzip2", "chrono", "datafusion-common 52.1.0", "datafusion-common-runtime 52.1.0", @@ -2560,14 +2612,18 @@ dependencies = [ "datafusion-physical-expr-common 52.1.0", "datafusion-physical-plan 52.1.0", "datafusion-session 52.1.0", + "flate2", "futures", "glob", "itertools 0.14.0", + "liblzma", "log", "object_store", "rand 0.9.2", "tokio", + "tokio-util", "url", + "zstd", ] [[package]] @@ -2618,6 +2674,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-datasource-avro" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "828088c2fb681cc0e06fb42f541f76c82a0c10278f9fd6334e22c8d1e3574ee7" +dependencies = [ + "apache-avro", + "arrow", + "async-trait", + "bytes", + "datafusion-common 52.1.0", + "datafusion-datasource 52.1.0", + "datafusion-physical-expr-common 52.1.0", + "datafusion-physical-plan 52.1.0", + "datafusion-session 52.1.0", + "futures", + "num-traits", + "object_store", +] + [[package]] name = "datafusion-datasource-csv" version = "51.0.0" @@ -2831,6 +2907,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "paste", + "recursive", "serde_json", "sqlparser", ] @@ -2900,6 +2977,8 @@ dependencies = [ "arrow", "arrow-buffer", "base64", + "blake2", + "blake3", "chrono", "chrono-tz", "datafusion-common 52.1.0", @@ -2911,9 +2990,11 @@ dependencies = [ "hex", "itertools 0.14.0", "log", + "md-5", "num-traits", "rand 0.9.2", "regex", + "sha2", "unicode-segmentation", "uuid", ] @@ -3176,6 +3257,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "recursive", "regex", "regex-syntax", ] @@ -3222,6 +3304,7 @@ dependencies = [ "parking_lot", "paste", "petgraph", + "recursive", "tokio", ] @@ -3320,6 +3403,7 @@ dependencies = [ "datafusion-physical-plan 52.1.0", "datafusion-pruning 52.1.0", "itertools 0.14.0", + "recursive", ] [[package]] @@ -3446,6 +3530,29 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "datafusion-spark" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556c431f5f2259620c8223254c0ef57aa9a85c576d4da0166157260f71eb0e25" +dependencies = [ + "arrow", + "bigdecimal", + "chrono", + "crc32fast", + "datafusion-catalog 52.1.0", + "datafusion-common 52.1.0", + "datafusion-execution 52.1.0", + "datafusion-expr 52.1.0", + "datafusion-functions 52.1.0", + "datafusion-functions-nested 52.1.0", + "log", + "percent-encoding", + "rand 0.9.2", + "sha1", + "url", +] + [[package]] name = "datafusion-sql" version = "51.0.0" @@ -3476,10 +3583,58 @@ dependencies = [ "datafusion-expr 52.1.0", "indexmap", "log", + "recursive", "regex", "sqlparser", ] +[[package]] +name = "datafusion-sqllogictest" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d388fec80647198ae041d314dd7d9e2305207836ecec3ad48908eac6844cdef" +dependencies = [ + "arrow", + "async-trait", + "bigdecimal", + "clap", + "datafusion 52.1.0", + "datafusion-spark", + "datafusion-substrait", + "futures", + "half", + "indicatif", + "itertools 0.14.0", + "log", + "object_store", + "sqllogictest", + "sqlparser", + "tempfile", + "thiserror 2.0.18", + "tokio", +] + +[[package]] +name = "datafusion-substrait" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6042adacd0bd64e56c22f6a7f9ce0ce1793dd367c899d868179d029f110d9215" +dependencies = [ + "async-recursion", + "async-trait", + "chrono", + "datafusion 52.1.0", + "half", + "itertools 0.14.0", + "object_store", + "pbjson-types", + "prost 0.14.3", + "substrait", + "tokio", + "url", + "uuid", +] + [[package]] name = "deepsize" version = "0.2.0" @@ -3691,6 +3846,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "educe" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "either" version = "1.15.0" @@ -3732,6 +3899,26 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "enum-ordinalize" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "env_filter" version = "0.1.4" @@ -3777,6 +3964,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "escape8259" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" + [[package]] name = "ethnum" version = "1.5.2" @@ -4011,6 +4204,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-err" +version = "3.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf68cef89750956493a66a10f512b9e58d9db21f2a573c079c0bdf1207a54a7" +dependencies = [ + "autocfg", +] + [[package]] name = "fs4" version = "0.8.4" @@ -4912,7 +5114,7 @@ dependencies = [ "console 0.16.2", "futures-core", "portable-atomic", - "unicode-width", + "unicode-width 0.2.2", "unit-prefix", "web-time", ] @@ -5894,6 +6096,18 @@ dependencies = [ "redox_syscall 0.7.0", ] +[[package]] +name = "libtest-mimic" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" +dependencies = [ + "anstream", + "anstyle", + "clap", + "escape8259", +] + [[package]] name = "line-clipping" version = "0.3.5" @@ -6440,6 +6654,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", + "serde", ] [[package]] @@ -6574,6 +6789,15 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + [[package]] name = "object_store" version = "0.12.5" @@ -6865,6 +7089,12 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "owo-colors" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" + [[package]] name = "papergrid" version = "0.17.0" @@ -6873,7 +7103,7 @@ checksum = "6978128c8b51d8f4080631ceb2302ab51e32cc6e8615f735ee2f83fd269ae3f1" dependencies = [ "bytecount", "fnv", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -6988,6 +7218,43 @@ dependencies = [ "stfu8", ] +[[package]] +name = "pbjson" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3" +dependencies = [ + "base64", + "serde", +] + +[[package]] +name = "pbjson-build" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095" +dependencies = [ + "heck", + "itertools 0.14.0", + "prost 0.14.3", + "prost-types", +] + +[[package]] +name = "pbjson-types" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526" +dependencies = [ + "bytes", + "chrono", + "pbjson", + "pbjson-build", + "prost 0.14.3", + "prost-build", + "serde", +] + [[package]] name = "pbkdf2" version = "0.12.2" @@ -7492,6 +7759,16 @@ dependencies = [ "prost 0.14.3", ] +[[package]] +name = "psm" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa96cb91275ed31d6da3e983447320c4eb219ac180fa1679a0889ff32861e2d" +dependencies = [ + "ar_archive_writer", + "cc", +] + [[package]] name = "ptr_meta" version = "0.1.4" @@ -7632,6 +7909,12 @@ dependencies = [ "url", ] +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + [[package]] name = "quick-xml" version = "0.37.5" @@ -7891,7 +8174,7 @@ dependencies = [ "thiserror 2.0.18", "unicode-segmentation", "unicode-truncate", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -7942,7 +8225,7 @@ dependencies = [ "strum 0.27.2", "time", "unicode-segmentation", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -7971,6 +8254,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.114", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -8055,6 +8358,16 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "regress" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" +dependencies = [ + "hashbrown 0.16.1", + "memchr", +] + [[package]] name = "relative-path" version = "1.9.3" @@ -8479,6 +8792,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + [[package]] name = "schemars" version = "1.2.1" @@ -8491,6 +8816,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.114", +] + [[package]] name = "scoped-tls" version = "1.0.1" @@ -8567,6 +8904,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "seq-macro" @@ -8584,6 +8925,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" +dependencies = [ + "serde", + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -8604,6 +8955,17 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "serde_json" version = "1.0.149" @@ -8646,6 +9008,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_tokenstream" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "syn 2.0.114", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -8658,6 +9032,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -8917,6 +9304,31 @@ dependencies = [ "der", ] +[[package]] +name = "sqllogictest" +version = "0.28.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3566426f72a13e393aa34ca3d542c5b0eb86da4c0db137ee9b5cfccc6179e52d" +dependencies = [ + "async-trait", + "educe", + "fs-err", + "futures", + "glob", + "humantime", + "itertools 0.13.0", + "libtest-mimic", + "md-5", + "owo-colors", + "rand 0.8.5", + "regex", + "similar", + "subst", + "tempfile", + "thiserror 2.0.18", + "tracing", +] + [[package]] name = "sqlparser" version = "0.59.0" @@ -8924,6 +9336,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -8944,6 +9357,19 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -9021,6 +9447,41 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "subst" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a9a86e5144f63c2d18334698269a8bfae6eece345c70b64821ea5b35054ec99" +dependencies = [ + "memchr", + "unicode-width 0.1.14", +] + +[[package]] +name = "substrait" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" +dependencies = [ + "heck", + "pbjson", + "pbjson-build", + "pbjson-types", + "prettyplease", + "prost 0.14.3", + "prost-build", + "prost-types", + "regress", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.114", + "typify", + "walkdir", +] + [[package]] name = "subtle" version = "2.6.1" @@ -9455,7 +9916,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f8daae29995a24f65619e19d8d31dea5b389f3d853d8bf297bbf607cd0014cc" dependencies = [ - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -9934,6 +10395,53 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typify" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629" +dependencies = [ + "typify-impl", + "typify-macro", +] + +[[package]] +name = "typify-impl" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240" +dependencies = [ + "heck", + "log", + "proc-macro2", + "quote", + "regress", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "syn 2.0.114", + "thiserror 2.0.18", + "unicode-ident", +] + +[[package]] +name = "typify-macro" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a" +dependencies = [ + "proc-macro2", + "quote", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.114", + "typify-impl", +] + [[package]] name = "ucd-trie" version = "0.1.7" @@ -9979,9 +10487,15 @@ checksum = "16b380a1238663e5f8a691f9039c73e1cdae598a30e9855f541d29b08b53e9a5" dependencies = [ "itertools 0.14.0", "unicode-segmentation", - "unicode-width", + "unicode-width 0.2.2", ] +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-width" version = "0.2.2" @@ -10006,6 +10520,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -11043,6 +11563,26 @@ dependencies = [ "vortex-session", ] +[[package]] +name = "vortex-sqllogictest" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "clap", + "datafusion 52.1.0", + "datafusion-sqllogictest", + "futures", + "indicatif", + "sqllogictest", + "tempfile", + "thiserror 2.0.18", + "tokio", + "vortex", + "vortex-datafusion", + "vortex-duckdb", +] + [[package]] name = "vortex-test-e2e" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 732c61aea52..f6b4e73fbe8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ members = [ "benchmarks/datafusion-bench", "benchmarks/duckdb-bench", "benchmarks/random-access-bench", + "vortex-sqllogictest", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" @@ -134,6 +135,7 @@ datafusion-physical-expr-adapter = { version = "52" } datafusion-physical-expr-common = { version = "52" } datafusion-physical-plan = { version = "52" } datafusion-pruning = { version = "52" } +datafusion-sqllogictest = { version = "52" } dirs = "6.0.0" divan = { package = "codspeed-divan-compat", version = "4.0.4" } enum-iterator = "2.0.0" diff --git a/vortex-duckdb/cpp/include/duckdb_vx/vector.h b/vortex-duckdb/cpp/include/duckdb_vx/vector.h index 601ab43c641..919c1d1b60e 100644 --- a/vortex-duckdb/cpp/include/duckdb_vx/vector.h +++ b/vortex-duckdb/cpp/include/duckdb_vx/vector.h @@ -35,9 +35,9 @@ void duckdb_vx_set_dictionary_vector_length(duckdb_vector dict, unsigned int len // Add the buffer to the string vector (basically, keep it alive as long as the vector). void duckdb_vx_string_vector_add_vector_data_buffer(duckdb_vector ffi_vector, duckdb_vx_vector_buffer buffer); - -// Add the buffer to the data vector (basically, keep it alive as long as the vector) and set the data pointer. -// You must ensure that the ptr is valid for the lifetime of the vector and the ptr addr + size is valid. +// Add the buffer to the data vector (basically, keep it alive as long as the vector) and set the data +// pointer. You must ensure that the ptr is valid for the lifetime of the vector and the ptr addr + size is +// valid. void duckdb_vx_vector_set_vector_data_buffer(duckdb_vector ffi_vector, duckdb_vx_vector_buffer buffer); // Set the data pointer for the vector. This is the start of the values array in the vector. @@ -50,6 +50,8 @@ void duckdb_vector_flatten(duckdb_vector vector, unsigned long len); const char *duckdb_vector_to_string(duckdb_vector vector, unsigned long len, duckdb_vx_error *err); +duckdb_value duckdb_vx_vector_get_value(duckdb_vector ffi_vector, idx_t index); + #ifdef __cplusplus /* End C ABI */ } #endif diff --git a/vortex-duckdb/cpp/vector.cpp b/vortex-duckdb/cpp/vector.cpp index d2b44a075f8..15af222c31c 100644 --- a/vortex-duckdb/cpp/vector.cpp +++ b/vortex-duckdb/cpp/vector.cpp @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors #include "duckdb/common/vector.hpp" +#include "duckdb/common/types/value.hpp" #include "duckdb/common/types/vector.hpp" #include "duckdb_vx.h" @@ -79,6 +80,13 @@ extern "C" void duckdb_vx_vector_set_data_ptr(duckdb_vector ffi_vector, void *pt dvector->SetDataPtr((data_ptr_t)ptr); } +extern "C" duckdb_value duckdb_vx_vector_get_value(duckdb_vector ffi_vector, idx_t index) { + auto vector = reinterpret_cast(ffi_vector); + + auto value = new duckdb::Value(vector->GetValue(index)); + return reinterpret_cast(value); +} + void duckdb_vector_flatten(duckdb_vector vector, unsigned long len) { auto dvector = reinterpret_cast(vector); dvector->Flatten(len); diff --git a/vortex-duckdb/src/duckdb/connection.rs b/vortex-duckdb/src/duckdb/connection.rs index af712acd09d..4670f79724d 100644 --- a/vortex-duckdb/src/duckdb/connection.rs +++ b/vortex-duckdb/src/duckdb/connection.rs @@ -200,8 +200,14 @@ mod tests { .query("SELECT 1 as int_col, 'text' as str_col") .unwrap(); - assert_eq!(result.column_type(0), cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - assert_eq!(result.column_type(1), cpp::DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); + assert_eq!( + result.column_type(0).as_type_id(), + cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER + ); + assert_eq!( + result.column_type(1).as_type_id(), + cpp::DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR + ); } #[test] diff --git a/vortex-duckdb/src/duckdb/logical_type.rs b/vortex-duckdb/src/duckdb/logical_type.rs index 7a233729a00..890d0bb8b8c 100644 --- a/vortex-duckdb/src/duckdb/logical_type.rs +++ b/vortex-duckdb/src/duckdb/logical_type.rs @@ -131,10 +131,6 @@ impl LogicalType { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BLOB) } - pub fn int64() -> Self { - Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT) - } - pub fn uint64() -> Self { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT) } @@ -143,10 +139,22 @@ impl LogicalType { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER) } + pub fn int64() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT) + } + pub fn bool() -> Self { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BOOLEAN) } + pub fn float32() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_FLOAT) + } + + pub fn float64() -> Self { + Self::new(DUCKDB_TYPE::DUCKDB_TYPE_DOUBLE) + } + pub fn as_decimal(&self) -> (u8, u8) { unsafe { ( diff --git a/vortex-duckdb/src/duckdb/query_result.rs b/vortex-duckdb/src/duckdb/query_result.rs index ff285f9ff84..bad1941e10e 100644 --- a/vortex-duckdb/src/duckdb/query_result.rs +++ b/vortex-duckdb/src/duckdb/query_result.rs @@ -7,6 +7,7 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_err; +use crate::LogicalType; use crate::cpp; use crate::duckdb::DataChunk; use crate::wrapper; @@ -67,8 +68,9 @@ impl QueryResult { } /// Get the type of a column by index. - pub fn column_type(&self, col_idx: usize) -> cpp::DUCKDB_TYPE { - unsafe { cpp::duckdb_column_type(self.as_ptr(), col_idx as u64) } + pub fn column_type(&self, col_idx: usize) -> LogicalType { + let dtype = unsafe { cpp::duckdb_column_type(self.as_ptr(), col_idx as u64) }; + LogicalType::new(dtype) } } diff --git a/vortex-duckdb/src/duckdb/vector.rs b/vortex-duckdb/src/duckdb/vector.rs index 0b8bbb309e0..6a1b88bb5e8 100644 --- a/vortex-duckdb/src/duckdb/vector.rs +++ b/vortex-duckdb/src/duckdb/vector.rs @@ -295,6 +295,17 @@ impl Vector { )) } } + + pub fn get_value(&self, idx: u64, len: u64) -> Option { + if idx >= len { + return None; + } + + unsafe { + let value_ptr = cpp::duckdb_vx_vector_get_value(self.as_ptr(), idx as idx_t); + Some(Value::own(value_ptr)) + } + } } pub struct ValidityRef<'a> { diff --git a/vortex-duckdb/src/lib.rs b/vortex-duckdb/src/lib.rs index d4ffaf90eb0..9f85fae772c 100644 --- a/vortex-duckdb/src/lib.rs +++ b/vortex-duckdb/src/lib.rs @@ -23,7 +23,7 @@ pub use crate::duckdb::LogicalType; pub use crate::duckdb::Value; use crate::scan::VortexTableFunction; -mod convert; +pub mod convert; pub mod duckdb; pub mod exporter; mod scan; diff --git a/vortex-sqllogictest/.gitignore b/vortex-sqllogictest/.gitignore new file mode 100644 index 00000000000..4620c5f726e --- /dev/null +++ b/vortex-sqllogictest/.gitignore @@ -0,0 +1,3 @@ +scratch/ +*.vortex +*.parquet \ No newline at end of file diff --git a/vortex-sqllogictest/Cargo.toml b/vortex-sqllogictest/Cargo.toml new file mode 100644 index 00000000000..ceb7946fa87 --- /dev/null +++ b/vortex-sqllogictest/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "vortex-sqllogictest" +authors = { workspace = true } +description = "Test runner for SQL integrations" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +publish = false +readme = "README.md" +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[dependencies] +anyhow = { workspace = true } +async-trait = { workspace = true } +clap = { workspace = true, features = ["derive"] } +datafusion = { workspace = true } +datafusion-sqllogictest = { workspace = true } +futures.workspace = true +indicatif.workspace = true +sqllogictest = "0.28" +tempfile = { workspace = true } +thiserror = { workspace = true } +tokio = { workspace = true, features = ["full"] } +vortex = { workspace = true, features = ["tokio"] } +vortex-datafusion = { workspace = true } +vortex-duckdb = { workspace = true } + +[lints] +workspace = true + +[[test]] +harness = false +name = "sqllogictests" +path = "bin/sqllogictests-runner.rs" diff --git a/vortex-sqllogictest/README.md b/vortex-sqllogictest/README.md new file mode 100644 index 00000000000..9c0b74c8c91 --- /dev/null +++ b/vortex-sqllogictest/README.md @@ -0,0 +1,54 @@ +# vortex-sqllogictest + +This crate uses `sqllogictest-rs` to run `slt` based tests on both DF and DuckDB, both preconfigured to work with Vortex. + +Different test files might run in parallel, but within the same file, the file will run for each query engine sequentially before the next one starts. + +## Running tests + +In order to run the tests, the current command is: + +```shell +cargo test -p vortex-sqllogictest --test sqllogictests +``` + +Note that `nextest` isn't currently supported, but might be in the future. + +## Writing a new test + +Currently, tests must account for the differences between the engines, the general pattern that works for basic things is using views over files, as DuckDB as and DataFusion don't seem to have a shared syntax to create a table backed by an external storage format. + +`$__TEST_DIR__` is a special variable used to point to a tempdir, its only available if substitution is enabled, by using `control substitution on`. + +Here is a simple test that can be reused: + +```text +query I +COPY (values (1, 2), (3, 4)) TO '$__TEST_DIR__/test.vortex'; +---- +2 + +statement ok +CREATE VIEW foo AS SELECT * FROM '$__TEST_DIR__/test.vortex'; + +query II +SELECT * FROM foo; +---- +1 2 +3 4 + +statement ok +DROP VIEW IF EXISTS foo; +``` + +## SLT Syntax + +We generally use the default `slt` syntax as described in the [SQLite wiki](https://sqlite.org/sqllogictest/doc/trunk/about.wiki). The one difference is that we use the same column types as `datafusion-sqllogictest`'s, so when specifying expected query result column types, we support the following identifiers: + +- 'B' for boolean +- 'D' for datetime +- 'I' for integer +- 'P' for timestamp +- 'R' for float +- 'T' for text +- '?' for anything else diff --git a/vortex-sqllogictest/bin/sqllogictests-runner.rs b/vortex-sqllogictest/bin/sqllogictests-runner.rs new file mode 100644 index 00000000000..6efb9df3f88 --- /dev/null +++ b/vortex-sqllogictest/bin/sqllogictests-runner.rs @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use clap::Parser; +use datafusion::common::GetExt; +use datafusion::datasource::provider::DefaultTableFactory; +use datafusion::execution::SessionStateBuilder; +use datafusion::prelude::SessionContext; +use datafusion_sqllogictest::DataFusion; +use datafusion_sqllogictest::df_value_validator; +use datafusion_sqllogictest::value_normalizer; +use futures::StreamExt; +use futures::TryStreamExt; +use indicatif::MultiProgress; +use indicatif::ProgressBar; +use indicatif::ProgressDrawTarget; +use indicatif::ProgressStyle; +use sqllogictest::Record; +use sqllogictest::Runner; +use sqllogictest::parse_file; +use sqllogictest::strict_column_validator; +use vortex::error::VortexExpect; +use vortex_datafusion::VortexFormatFactory; +use vortex_sqllogictest::args::Args; +use vortex_sqllogictest::duckdb::DuckDB; +use vortex_sqllogictest::duckdb::DuckDBTestError; +use vortex_sqllogictest::utils::list_files; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = Args::parse(); + + if args.list { + eprintln!("Ignoring `--list` which is unsupported by `sqlogictests-runner`"); + + return Ok(()); + } + + if args.filter.is_some() { + eprintln!("Ignoring test filter for sqllogictest"); + } + + let mpb = MultiProgress::with_draw_target(ProgressDrawTarget::stderr_with_hz(1)); + + let crate_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")); + let path = crate_path.join("slt/"); + + let all_errors = futures::stream::iter(list_files(path)?) + .map(|path| { + let mpb = mpb.clone(); + + async move { + let mut errors = vec![]; + let factory = Arc::new(VortexFormatFactory::new()); + let session_state_builder = SessionStateBuilder::new() + .with_default_features() + .with_table_factory( + factory.get_ext().to_uppercase(), + Arc::new(DefaultTableFactory::new()), + ) + .with_file_formats(vec![factory]); + + let session = SessionContext::new_with_state(session_state_builder.build()) + .enable_url_table(); + + let filename = path + .file_name() + .vortex_expect("must be file") + .to_string_lossy(); + let records = parse_file(path.canonicalize()?)?; + + let df_pb = mpb.add(ProgressBar::new(records.len() as u64)); + df_pb.set_message(format!("DF {filename}")); + df_pb.set_style(ProgressStyle::default_spinner()); + + let mut df_runner = Runner::new(|| async { + Ok(DataFusion::new( + session.clone(), + path.clone(), + df_pb.clone(), + )) + }); + + df_runner.add_label("datafusion"); + df_runner.with_column_validator(strict_column_validator); + df_runner.with_normalizer(value_normalizer); + df_runner.with_validator(df_value_validator); + + for record in records.iter() { + if let Record::Halt { .. } = record { + break; + } + + if let Err(e) = df_runner.run_async(record.clone()).await { + errors.push(format!("DF Failure: {e}")); + } + } + + df_pb.finish_and_clear(); + + let duckdb_pb = mpb.add(ProgressBar::new(records.len() as u64)); + duckdb_pb.set_message(format!("DuckDB {filename}")); + + let mut duckdb_runner = Runner::new(|| async { + DuckDB::try_new(duckdb_pb.clone()) + .map_err(|e| DuckDBTestError::Other(e.to_string())) + }); + + duckdb_runner.add_label("duckdb"); + duckdb_runner.with_column_validator(strict_column_validator); + duckdb_runner.with_normalizer(value_normalizer); + + for record in records.iter() { + if let Record::Halt { .. } = record { + break; + } + + if let Err(e) = duckdb_runner.run_async(record.clone()).await { + errors.push(format!("DuckDB Failure: {e}")); + } + } + + duckdb_pb.finish_and_clear(); + + anyhow::Ok(errors) + } + }) + .buffer_unordered(args.test_threads) + .try_collect::>() + .await?; + + for err in all_errors.into_iter().flatten() { + eprintln!("Failure: {err}"); + } + + Ok(()) +} diff --git a/vortex-sqllogictest/build.rs b/vortex-sqllogictest/build.rs new file mode 100644 index 00000000000..a62b3e39458 --- /dev/null +++ b/vortex-sqllogictest/build.rs @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![allow(clippy::unwrap_used)] + +fn main() { + // Propagate DuckDB rpath from vortex-duckdb + let duckdb_lib = std::env::var("DEP_DUCKDB_LIB_DIR").unwrap(); + println!("cargo:rustc-link-arg=-Wl,-rpath,{duckdb_lib}"); +} diff --git a/vortex-sqllogictest/slt/create.slt b/vortex-sqllogictest/slt/create.slt new file mode 100644 index 00000000000..7f68683207e --- /dev/null +++ b/vortex-sqllogictest/slt/create.slt @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +include ./setup.slt + +query I +COPY (values (1, 2), (3, 4)) TO '$__TEST_DIR__/create.vortex'; +---- +2 + +statement ok +CREATE VIEW foo AS SELECT * FROM '$__TEST_DIR__/create.vortex'; + +query II +SELECT * FROM foo; +---- +1 2 +3 4 + +statement ok +DROP VIEW IF EXISTS foo; \ No newline at end of file diff --git a/vortex-sqllogictest/slt/setup.slt b/vortex-sqllogictest/slt/setup.slt new file mode 100644 index 00000000000..b8fd819150b --- /dev/null +++ b/vortex-sqllogictest/slt/setup.slt @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +# This is a common setup script to enable shared functionality + +# Enables default substitution, using tempdir anywhere $__TEST_DIR__ is used +control substitution on \ No newline at end of file diff --git a/vortex-sqllogictest/src/args.rs b/vortex-sqllogictest/src/args.rs new file mode 100644 index 00000000000..ff08ad15e25 --- /dev/null +++ b/vortex-sqllogictest/src/args.rs @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use clap::Parser; + +#[derive(clap::ValueEnum, Clone, Copy, Debug)] +pub enum Engine { + #[clap(name = "datafusion")] + DataFusion, + #[clap(name = "duckdb")] + DuckDB, +} + +/// Binary args, including all flags that `cargo test` might pass. +#[derive(Parser, Debug)] +pub struct Args { + #[arg(short, long, value_enum, value_delimiter = ',')] + pub engine: Option>, + #[arg(action)] + pub filter: Option, + + #[clap( + long, + help = "IGNORED (for compatibility with built in rust test runner)" + )] + pub format: Option, + + #[clap( + short = 'Z', + long, + help = "IGNORED (for compatibility with built in rust test runner)" + )] + pub z_options: Option, + + #[clap( + long, + help = "IGNORED (for compatibility with built in rust test runner)" + )] + pub show_output: bool, + + #[clap( + long, + help = "Quits immediately, not listing anything (for compatibility with built-in rust test runner)" + )] + pub list: bool, + + #[clap( + long, + help = "IGNORED (for compatibility with built-in rust test runner)" + )] + pub ignored: bool, + + #[clap( + long, + help = "IGNORED (for compatibility with built-in rust test runner)" + )] + pub nocapture: bool, + + #[clap( + long, + help = "Number of threads used for running tests in parallel", + default_value_t = 16 + )] + pub test_threads: usize, +} diff --git a/vortex-sqllogictest/src/duckdb.rs b/vortex-sqllogictest/src/duckdb.rs new file mode 100644 index 00000000000..d96898b657c --- /dev/null +++ b/vortex-sqllogictest/src/duckdb.rs @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::process::Command; +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use datafusion_sqllogictest::DFColumnType; +use indicatif::ProgressBar; +use sqllogictest::DBOutput; +use sqllogictest::runner::AsyncDB; +use vortex::error::VortexError; +use vortex_duckdb::LogicalType; +use vortex_duckdb::Value; +use vortex_duckdb::duckdb::Connection; +use vortex_duckdb::duckdb::Database; +use vortex_duckdb::register_table_functions; + +#[derive(Debug, thiserror::Error)] +pub enum DuckDBTestError { + Other(String), + Vortex(#[from] VortexError), +} + +impl std::fmt::Display for DuckDBTestError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DuckDBTestError::Other(msg) => write!(f, "Other: {msg}"), + DuckDBTestError::Vortex(inner) => write!(f, "Vortex: {inner}"), + } + } +} + +struct Inner { + conn: Connection, + _db: Database, +} + +unsafe impl Send for Inner {} +unsafe impl Sync for Inner {} + +pub struct DuckDB { + inner: Arc, + pb: ProgressBar, +} + +impl DuckDB { + pub fn try_new(pb: ProgressBar) -> Result { + let db = Database::open_in_memory()?; + db.register_vortex_scan_replacement()?; + let conn = db.connect()?; + + register_table_functions(&conn)?; + Ok(Self { + pb, + inner: Arc::new(Inner { conn, _db: db }), + }) + } + + /// Turn the DuckDB logical type into a `DFColumnType`, which + /// tells the runner what types they are. We use the one from DataFusion + /// as its richer than the default one. + fn normalize_column_type(dtype: LogicalType) -> DFColumnType { + let type_id = dtype.as_type_id(); + if type_id == LogicalType::int32().as_type_id() + || type_id == LogicalType::int64().as_type_id() + || type_id == LogicalType::uint64().as_type_id() + { + DFColumnType::Integer + } else if type_id == LogicalType::varchar().as_type_id() { + DFColumnType::Text + } else if type_id == LogicalType::bool().as_type_id() { + DFColumnType::Boolean + } else if type_id == LogicalType::float32().as_type_id() + || type_id == LogicalType::float64().as_type_id() + { + DFColumnType::Float + } else { + DFColumnType::Another + } + } +} + +#[async_trait] +impl AsyncDB for DuckDB { + type Error = DuckDBTestError; + type ColumnType = DFColumnType; + + async fn run(&mut self, sql: &str) -> Result, Self::Error> { + let result = { + let r = self.inner.conn.query(sql)?; + + if r.column_count() == 0 && r.row_count() == 0 { + Ok(DBOutput::StatementComplete(0)) + } else { + let mut types = Vec::default(); + let mut rows = Vec::default(); + + for col_idx in 0..r.column_count() { + let col_idx = usize::try_from(col_idx).map_err(VortexError::from)?; + let dtype = r.column_type(col_idx); + types.push(Self::normalize_column_type(dtype)); + } + + for chunk in r.into_iter() { + for row_idx in 0..chunk.len() { + let mut current_row = Vec::new(); + for col_idx in 0..chunk.column_count() { + let vector = chunk.get_vector(col_idx); + match vector.get_value(row_idx, chunk.len()) { + Some(value) => current_row.push(value.to_string()), + None => current_row + .push(Value::null(&vector.logical_type()).to_string()), + } + } + + rows.push(current_row); + } + } + + Ok(DBOutput::Rows { types, rows }) + } + }; + + self.pb.inc(1); + + result + } + + async fn shutdown(&mut self) {} + + fn engine_name(&self) -> &str { + "DuckDB" + } + + async fn sleep(dur: Duration) { + tokio::time::sleep(dur).await + } + + async fn run_command(command: Command) -> std::io::Result { + tokio::process::Command::from(command).output().await + } +} diff --git a/vortex-sqllogictest/src/lib.rs b/vortex-sqllogictest/src/lib.rs new file mode 100644 index 00000000000..56964ba799a --- /dev/null +++ b/vortex-sqllogictest/src/lib.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +pub mod args; +pub mod duckdb; +pub mod utils; diff --git a/vortex-sqllogictest/src/utils.rs b/vortex-sqllogictest/src/utils.rs new file mode 100644 index 00000000000..5d4e1bf2d93 --- /dev/null +++ b/vortex-sqllogictest/src/utils.rs @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::path::Path; +use std::path::PathBuf; + +use indicatif::ProgressStyle; + +pub fn list_files(path: impl AsRef) -> anyhow::Result> { + let mut file_paths = vec![]; + + list_files_impl(&mut file_paths, path)?; + + Ok(file_paths) +} + +fn list_files_impl(file_paths: &mut Vec, path: impl AsRef) -> anyhow::Result<()> { + let path = path.as_ref(); + + let read_dir = std::fs::read_dir(path)?; + for entry in read_dir { + let entry = entry?; + + if entry.metadata()?.is_dir() { + list_files_impl(file_paths, entry.path())?; + } else { + let path = entry.path(); + if path.extension().is_some_and(|ext| ext == "slt") { + file_paths.push(entry.path()); + } + } + } + + Ok(()) +} + +#[allow(clippy::unwrap_used)] +pub fn pb_style() -> ProgressStyle { + ProgressStyle::with_template("[{elapsed_precise}] {wide_bar} {pos:>7}/{len:7} {msg}") + .unwrap() + .progress_chars("##-") +}