diff --git a/.ai/skills/check-upstream/SKILL.md b/.ai/skills/check-upstream/SKILL.md index ac4835a4e..3bac018ef 100644 --- a/.ai/skills/check-upstream/SKILL.md +++ b/.ai/skills/check-upstream/SKILL.md @@ -29,6 +29,29 @@ You are auditing the datafusion-python project to find features from the upstrea **IMPORTANT: The Python API is the source of truth for coverage.** A function or method is considered "exposed" if it exists in the Python API (e.g., `python/datafusion/functions.py`), even if there is no corresponding entry in the Rust bindings. Many upstream functions are aliases of other functions — the Python layer can expose these aliases by calling a different underlying Rust binding. Do NOT report a function as missing if it appears in the Python `__all__` list and has a working implementation, regardless of whether a matching `#[pyfunction]` exists in Rust. +**IMPORTANT: audit the total upstream surface, not the delta since the last pin.** Gaps accumulate across syncs. A patch-release bump with a "bug fixes only" changelog does not mean there is nothing to find — pre-existing gaps from earlier majors still need to be surfaced. Always run the full comparison. + +## Compile-Signal Triggers + +If a recent upstream bump required *any* of the following while fixing +compile errors in `crates/core/` or the FFI example, treat that as a +**hard signal** that user-facing surface area grew and run this skill +before considering the bump done. Each pattern corresponds to a class of +gap that frequently shows up in the audit: + +| Signal during PR 1 compile fix | Likely gap to check | +|---|---| +| New `Expr::*` variant added to a non-exhaustive `match` (`HigherOrderFunction`, `Lambda`, `LambdaVariable`, …) | New lambda / higher-order scalar functions (`any_match`, `array_transform`, `list_transform`, …) | +| New `ScalarValue::*` variant (`ListView`, `LargeListView`, …) | New scalar / array functions that consume or produce the type | +| New required trait method on `ExecutionPlan` / `TableProvider` / `*UDFImpl` (`apply_expressions`, …) | Corresponding capability on the Python wrapper class | +| Renamed or restructured struct field (e.g. `Cast.data_type` → `Cast.field: FieldRef`) | Any Python accessor / SKILL.md doc that read the old field | +| Newly deprecated trait method with a `_with_args` / `_with_options` replacement | The `*_with_options` variant frequently warrants a separate Python entry point | + +PR 1 of `dev/release/upstream-sync.md` asks you to log these signals as +they appear. When you run this skill, use that log as a checklist: every +entry must either show up in the audit output or be explicitly skipped +with a reason. + ## Areas to Check The user may specify an area via `$ARGUMENTS`. If no area is specified or "all" is given, check all areas. @@ -173,6 +196,28 @@ These upstream FFI types have been reviewed and do not need to be independently - FFI example in `examples/datafusion-ffi-example/` - Type appears in union type hints where accepted +### 8. `__all__` Hygiene (functions.py) + +Independent of upstream parity, also flag public `def` symbols in +`python/datafusion/functions.py` that are missing from the module's +`__all__`. These are functions a user can call but that do not show up in +`from datafusion.functions import *`, in tab-completion against the +namespace, or in generated API docs — typically an oversight rather than +an intentional omission. + +**How to check:** +1. Grep for `^def ([a-z_][a-z0-9_]*)\(` in `python/datafusion/functions.py` + to enumerate every public function definition. +2. Read the `__all__` list at the top of the same file. +3. Report any function in (1) that is not in (2). Skip private helpers + (names starting with `_`). + +A historical example: `instr` and `position` shipped as public `def`s but +were absent from `__all__` until the gap was caught here. + +For each finding, propose adding the name to `__all__` in alphabetical +position with the existing entries. + ## Checking for Existing GitHub Issues After identifying missing APIs, search the open issues at https://github.com/apache/datafusion-python/issues for each gap to see if an issue already exists requesting that API be exposed. Search using the function or method name as the query. diff --git a/Cargo.lock b/Cargo.lock index 1d148b0e1..0c4b77582 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,54 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "abi_stable" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" -dependencies = [ - "abi_stable_derive", - "abi_stable_shared", - "const_panic", - "core_extensions", - "crossbeam-channel", - "generational-arena", - "libloading", - "lock_api", - "parking_lot", - "paste", - "repr_offset", - "rustc_version", - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "abi_stable_derive" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" -dependencies = [ - "abi_stable_shared", - "as_derive_utils", - "core_extensions", - "proc-macro2", - "quote", - "rustc_version", - "syn 1.0.109", - "typed-arena", -] - -[[package]] -name = "abi_stable_shared" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" -dependencies = [ - "core_extensions", -] - [[package]] name = "adler2" version = "2.0.1" @@ -115,35 +67,6 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" -[[package]] -name = "apache-avro" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" -dependencies = [ - "bigdecimal", - "bon", - "bzip2", - "crc32fast", - "digest", - "liblzma", - "log", - "miniz_oxide", - "num-bigint", - "quad-rand", - "rand 0.9.4", - "regex-lite", - "serde", - "serde_bytes", - "serde_json", - "snap", - "strum", - "strum_macros", - "thiserror", - "uuid", - "zstd", -] - [[package]] name = "ar_archive_writer" version = "0.5.1" @@ -176,9 +99,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" dependencies = [ "arrow-arith", "arrow-array", @@ -198,9 +121,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" dependencies = [ "arrow-array", "arrow-buffer", @@ -229,6 +152,30 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrow-avro" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "049230728cd6e093088c8d231b4beede184e35cad7777c1505c0d5a8571f4376" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "bytes", + "bzip2", + "crc", + "flate2", + "indexmap", + "liblzma", + "rand 0.9.2", + "serde", + "serde_json", + "snap", + "strum_macros", + "uuid", + "zstd", +] + [[package]] name = "arrow-buffer" version = "58.3.0" @@ -243,9 +190,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" dependencies = [ "arrow-array", "arrow-buffer", @@ -265,9 +212,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" +checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de" dependencies = [ "arrow-array", "arrow-cast", @@ -293,9 +240,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" +checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f" dependencies = [ "arrow-array", "arrow-buffer", @@ -309,15 +256,16 @@ dependencies = [ [[package]] name = "arrow-json" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" +checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", - "arrow-data", + "arrow-ord", "arrow-schema", + "arrow-select", "chrono", "half", "indexmap", @@ -333,9 +281,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -346,9 +294,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e63351dc11981a316c828a6032a5021345bba882f68bc4a36c36825a50725089" +checksum = "d29abdf672a81c1aeb57fd2661457f9918964d49aed0e9f18932535f2a9e49ce" dependencies = [ "arrow-array", "arrow-data", @@ -358,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -382,9 +330,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" dependencies = [ "ahash", "arrow-array", @@ -396,9 +344,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" dependencies = [ "arrow-array", "arrow-buffer", @@ -411,18 +359,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "as_derive_utils" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" -dependencies = [ - "core_extensions", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "async-compression" version = "0.4.41" @@ -440,9 +376,6 @@ name = "async-ffi" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" -dependencies = [ - "abi_stable", -] [[package]] name = "async-recursion" @@ -504,7 +437,6 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", - "serde", ] [[package]] @@ -519,7 +451,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "digest", + "digest 0.10.7", ] [[package]] @@ -546,28 +478,12 @@ dependencies = [ ] [[package]] -name = "bon" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" -dependencies = [ - "bon-macros", - "rustversion", -] - -[[package]] -name = "bon-macros" -version = "3.9.1" +name = "block-buffer" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" dependencies = [ - "darling", - "ident_case", - "prettyplease", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.117", + "hybrid-array", ] [[package]] @@ -715,6 +631,12 @@ version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "const-random" version = "0.1.18" @@ -735,15 +657,6 @@ dependencies = [ "tiny-keccak", ] -[[package]] -name = "const_panic" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" -dependencies = [ - "typewit", -] - [[package]] name = "constant_time_eq" version = "0.4.2" @@ -766,21 +679,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "core_extensions" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42bb5e5d0269fd4f739ea6cedaf29c16d81c27a7ce7582008e90eb50dcd57003" -dependencies = [ - "core_extensions_proc_macros", -] - -[[package]] -name = "core_extensions_proc_macros" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533d38ecd2709b7608fb8e18e4504deb99e9a72879e6aa66373a76d8dc4259ea" - [[package]] name = "cpufeatures" version = "0.2.17" @@ -800,21 +698,27 @@ dependencies = [ ] [[package]] -name = "crc32fast" -version = "1.5.0" +name = "crc" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" dependencies = [ - "cfg-if", + "crc-catalog", ] [[package]] -name = "crossbeam-channel" -version = "0.5.15" +name = "crc-catalog" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ - "crossbeam-utils", + "cfg-if", ] [[package]] @@ -839,6 +743,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +dependencies = [ + "hybrid-array", +] + [[package]] name = "cstr" version = "0.2.12" @@ -870,40 +783,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "darling" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" -dependencies = [ - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.117", -] - -[[package]] -name = "darling_macro" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" -dependencies = [ - "darling_core", - "quote", - "syn 2.0.117", -] - [[package]] name = "dashmap" version = "6.1.0" @@ -921,13 +800,11 @@ dependencies = [ [[package]] name = "datafusion" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "arrow-schema", "async-trait", - "bytes", "bzip2", "chrono", "datafusion-catalog", @@ -958,14 +835,13 @@ dependencies = [ "datafusion-sql", "flate2", "futures", + "indexmap", "itertools", "liblzma", "log", "object_store", "parking_lot", "parquet", - "rand 0.9.4", - "regex", "sqlparser", "tempfile", "tokio", @@ -977,8 +853,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "async-trait", @@ -1002,8 +877,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "async-trait", @@ -1025,34 +899,32 @@ dependencies = [ [[package]] name = "datafusion-common" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ - "ahash", - "apache-avro", "arrow", "arrow-ipc", + "arrow-schema", "chrono", + "foldhash 0.2.0", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap", "itertools", "libc", "log", "object_store", "parquet", - "paste", "recursive", "sqlparser", "tokio", + "uuid", "web-time", ] [[package]] name = "datafusion-common-runtime" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "futures", "log", @@ -1062,8 +934,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "async-compression", @@ -1087,7 +958,8 @@ dependencies = [ "liblzma", "log", "object_store", - "rand 0.9.4", + "parking_lot", + "rand 0.9.2", "tokio", "tokio-util", "url", @@ -1097,8 +969,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "arrow-ipc", @@ -1121,28 +992,25 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a579c3bd290c66ea4b269493e75e8a3ed42c9c895a651f10210a29538aee50c4" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ - "apache-avro", "arrow", + "arrow-avro", "async-trait", "bytes", "datafusion-common", "datafusion-datasource", - "datafusion-physical-expr-common", + "datafusion-physical-expr-adapter", "datafusion-physical-plan", "datafusion-session", "futures", - "num-traits", "object_store", ] [[package]] name = "datafusion-datasource-csv" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "async-trait", @@ -1164,8 +1032,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "async-trait", @@ -1180,7 +1047,6 @@ dependencies = [ "datafusion-session", "futures", "object_store", - "serde_json", "tokio", "tokio-stream", ] @@ -1188,8 +1054,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a8e0365e0e08e8ff94d912f0ababcf9065a1a304018ba90b1fc83c855b4997" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "async-trait", @@ -1199,6 +1064,7 @@ dependencies = [ "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-functions", "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", @@ -1218,19 +1084,16 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" [[package]] name = "datafusion-execution" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "arrow-buffer", "async-trait", - "chrono", "dashmap", "datafusion-common", "datafusion-expr", @@ -1239,7 +1102,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.9.4", + "rand 0.9.2", "tempfile", "url", ] @@ -1247,10 +1110,10 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", + "arrow-schema", "async-trait", "chrono", "datafusion-common", @@ -1261,7 +1124,6 @@ dependencies = [ "datafusion-physical-expr-common", "indexmap", "itertools", - "paste", "recursive", "serde_json", "sqlparser", @@ -1270,27 +1132,24 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "datafusion-common", "indexmap", "itertools", - "paste", ] [[package]] name = "datafusion-ffi" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b95173344d04ba62755c949bf44f8d1a6e4414cf6392a635db96c07e711b9a3c" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ - "abi_stable", "arrow", "arrow-schema", "async-ffi", "async-trait", + "chrono", "datafusion-catalog", "datafusion-common", "datafusion-datasource", @@ -1299,14 +1158,17 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", + "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-proto", "datafusion-proto-common", "datafusion-session", "futures", + "libloading", "log", "prost", "semver", + "stabby", "tokio", ] @@ -1335,8 +1197,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "arrow-buffer", @@ -1351,26 +1212,24 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", + "datafusion-physical-expr-common", "hex", "itertools", "log", - "md-5", + "md-5 0.11.0", "memchr", "num-traits", - "rand 0.9.4", + "rand 0.9.2", "regex", "sha2", - "unicode-segmentation", "uuid", ] [[package]] name = "datafusion-functions-aggregate" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-doc", @@ -1380,19 +1239,17 @@ dependencies = [ "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", + "foldhash 0.2.0", "half", "log", "num-traits", - "paste", ] [[package]] name = "datafusion-functions-aggregate-common" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-expr-common", @@ -1402,8 +1259,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "arrow-ord", @@ -1417,18 +1273,17 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "itertools", "itoa", "log", - "paste", + "memchr", ] [[package]] name = "datafusion-functions-table" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "async-trait", @@ -1437,14 +1292,12 @@ dependencies = [ "datafusion-expr", "datafusion-physical-plan", "parking_lot", - "paste", ] [[package]] name = "datafusion-functions-window" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "datafusion-common", @@ -1455,14 +1308,12 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "log", - "paste", ] [[package]] name = "datafusion-functions-window-common" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1471,8 +1322,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "datafusion-doc", "quote", @@ -1482,8 +1332,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "chrono", @@ -1502,10 +1351,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-expr", @@ -1513,11 +1360,10 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap", "itertools", "parking_lot", - "paste", "petgraph", "recursive", "tokio", @@ -1526,8 +1372,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "datafusion-common", @@ -1541,25 +1386,23 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ - "ahash", "arrow", "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap", "itertools", "parking_lot", + "pin-project", ] [[package]] name = "datafusion-physical-optimizer" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "datafusion-common", @@ -1577,11 +1420,11 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ - "ahash", "arrow", + "arrow-data", + "arrow-ipc", "arrow-ord", "arrow-schema", "async-trait", @@ -1596,7 +1439,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap", "itertools", "log", @@ -1609,8 +1452,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a387aaef949dc16bb6abc81bd1af850ec7449183aef011214f9724957495738" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "chrono", @@ -1631,14 +1473,12 @@ dependencies = [ "datafusion-proto-common", "object_store", "prost", - "rand 0.9.4", ] [[package]] name = "datafusion-proto-common" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16e614c7c53a9c304c6a850b821010bb492e57300311835f1180613f9d2c63d9" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "datafusion-common", @@ -1648,8 +1488,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "datafusion-common", @@ -1658,7 +1497,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools", "log", ] @@ -1709,8 +1547,7 @@ dependencies = [ [[package]] name = "datafusion-session" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "async-trait", "datafusion-common", @@ -1723,8 +1560,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "arrow", "bigdecimal", @@ -1741,9 +1577,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "53.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5e5656a7e63d51dd3e5af3dbd347ea83bbe993a77c66b854b74961570d16490" +version = "53.1.0" +source = "git+https://github.com/apache/datafusion?rev=47655fd6c9ef060d73497987e6ccb98e57196508#47655fd6c9ef060d73497987e6ccb98e57196508" dependencies = [ "async-recursion", "async-trait", @@ -1765,11 +1600,22 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", + "block-buffer 0.10.4", + "crypto-common 0.1.7", "subtle", ] +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.0", + "const-oid", + "crypto-common 0.2.1", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -1963,15 +1809,6 @@ dependencies = [ "slab", ] -[[package]] -name = "generational-arena" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" -dependencies = [ - "cfg-if", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -2091,6 +1928,11 @@ name = "hashbrown" version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "heck" @@ -2149,6 +1991,15 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", +] + [[package]] name = "hyper" version = "1.8.1" @@ -2322,12 +2173,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - [[package]] name = "idna" version = "1.1.0" @@ -2351,12 +2196,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "serde", "serde_core", ] @@ -2489,18 +2334,18 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.183" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libloading" -version = "0.7.4" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" dependencies = [ "cfg-if", - "winapi", + "windows-link", ] [[package]] @@ -2531,11 +2376,12 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libmimalloc-sys" -version = "0.1.47" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d1eacfa31c33ec25e873c136ba5669f00f9866d0688bea7be4d3f7e43067df6" +checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870" dependencies = [ "cc", + "libc", ] [[package]] @@ -2587,7 +2433,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ "cfg-if", - "digest", + "digest 0.10.7", +] + +[[package]] +name = "md-5" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" +dependencies = [ + "cfg-if", + "digest 0.11.3", ] [[package]] @@ -2598,9 +2454,9 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "mimalloc" -version = "0.1.50" +version = "0.1.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3627c4272df786b9260cabaa46aec1d59c93ede723d4c3ef646c503816b0640" +checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8" dependencies = [ "libmimalloc-sys", ] @@ -2640,7 +2496,6 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", - "serde", ] [[package]] @@ -2700,7 +2555,7 @@ dependencies = [ "humantime", "hyper", "itertools", - "md-5", + "md-5 0.10.6", "parking_lot", "percent-encoding", "quick-xml", @@ -2766,9 +2621,9 @@ dependencies = [ [[package]] name = "parquet" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" +checksum = "5dafa7d01085b62a47dd0c1829550a0a36710ea9c4fe358a05a85477cec8a908" dependencies = [ "ahash", "arrow-array", @@ -2784,7 +2639,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "lz4_flex", "num-bigint", "num-integer", @@ -2879,6 +2734,26 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf0d9e68100b3a7989b4901972f265cd542e560a3a8a724e1e20322f4d06ce9" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a990e22f43e84855daf260dded30524ef4a9021cc7541c26540500a50b624389" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -2931,6 +2806,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -3093,12 +2977,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "quad-rand" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" - [[package]] name = "quick-xml" version = "0.39.2" @@ -3138,7 +3016,7 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand 0.9.4", + "rand 0.9.2", "ring", "rustc-hash", "rustls", @@ -3187,11 +3065,22 @@ checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "rand" -version = "0.9.4" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha", + "rand_chacha 0.9.0", "rand_core 0.9.5", ] @@ -3206,6 +3095,16 @@ dependencies = [ "rand_core 0.10.0", ] +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + [[package]] name = "rand_chacha" version = "0.9.0" @@ -3216,6 +3115,15 @@ dependencies = [ "rand_core 0.9.5", ] +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + [[package]] name = "rand_core" version = "0.9.5" @@ -3283,12 +3191,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "regex-lite" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" - [[package]] name = "regex-syntax" version = "0.8.10" @@ -3305,15 +3207,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "repr_offset" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" -dependencies = [ - "tstr", -] - [[package]] name = "reqwest" version = "0.12.28" @@ -3436,9 +3329,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.13" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "ring", "rustls-pki-types", @@ -3530,9 +3423,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" dependencies = [ "serde", "serde_core", @@ -3554,16 +3447,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde_bytes" -version = "0.11.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" -dependencies = [ - "serde", - "serde_core", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -3601,6 +3484,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ + "indexmap", "itoa", "memchr", "serde", @@ -3647,15 +3531,21 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.9" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" dependencies = [ "cfg-if", - "cpufeatures 0.2.17", - "digest", + "cpufeatures 0.3.0", + "digest 0.11.3", ] +[[package]] +name = "sha2-const-stable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f179d4e11094a893b82fff208f74d448a7512f99f5a0acbd5c679b705f83ed9" + [[package]] name = "shlex" version = "1.3.0" @@ -3710,9 +3600,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.61.0" +version = "0.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" +checksum = "13c6d1b651dc4edf07eead2a0c6c78016ce971bc2c10da5266861b13f25e7cec" dependencies = [ "log", "recursive", @@ -3730,6 +3620,41 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "stabby" +version = "72.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976399a0c48ea769ef7f5dc303bb88240ab8d84008647a6b2303eced3dab3945" +dependencies = [ + "rustversion", + "stabby-abi", +] + +[[package]] +name = "stabby-abi" +version = "72.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7b54832a9a1f92a0e55e74a5c0332744426edc515bb3fbad82f10b874a87f0d" +dependencies = [ + "rustc_version", + "rustversion", + "sha2-const-stable", + "stabby-macros", +] + +[[package]] +name = "stabby-macros" +version = "72.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a768b1e51e4dbfa4fa52ae5c01241c0a41e2938fdffbb84add0c8238092f9091" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "rand 0.8.6", + "syn 1.0.109", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -3749,23 +3674,11 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "strsim" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" - -[[package]] -name = "strum" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" - [[package]] name = "strum_macros" -version = "0.27.2" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664" dependencies = [ "heck", "proc-macro2", @@ -3775,11 +3688,12 @@ dependencies = [ [[package]] name = "substrait" -version = "0.62.2" +version = "0.63.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" +checksum = "e620ff4d5c02fd6f7752931aa74b16a26af66a63022cc1ad412c77edbe0bab47" dependencies = [ "heck", + "indexmap", "pbjson", "pbjson-build", "pbjson-types", @@ -3992,6 +3906,36 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.25.11+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow", +] + [[package]] name = "tower" version = "0.5.3" @@ -4074,44 +4018,17 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "tstr" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" -dependencies = [ - "tstr_proc_macros", -] - -[[package]] -name = "tstr_proc_macros" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" - [[package]] name = "twox-hash" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" -[[package]] -name = "typed-arena" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" - [[package]] name = "typenum" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" - -[[package]] -name = "typewit" -version = "1.14.2" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" [[package]] name = "typify" @@ -4216,13 +4133,12 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.23.1" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ "getrandom 0.4.2", "js-sys", - "serde_core", "wasm-bindgen", ] @@ -4401,22 +4317,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.11" @@ -4426,12 +4326,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.62.2" @@ -4656,6 +4550,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 077bc093f..13d7040a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,3 +71,12 @@ codegen-units = 2 # We cannot publish to crates.io with any patches in the below section. Developers # must remove any entries in this section before creating a release candidate. [patch.crates-io] +datafusion = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } +datafusion-substrait = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } +datafusion-proto = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } +datafusion-ffi = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } +datafusion-catalog = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } +datafusion-common = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } +datafusion-functions-aggregate = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } +datafusion-functions-window = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } +datafusion-expr = { git = "https://github.com/apache/datafusion", rev = "47655fd6c9ef060d73497987e6ccb98e57196508" } diff --git a/crates/core/src/catalog.rs b/crates/core/src/catalog.rs index 30ec4744c..8ad49b098 100644 --- a/crates/core/src/catalog.rs +++ b/crates/core/src/catalog.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::collections::HashSet; use std::ptr::NonNull; use std::sync::Arc; @@ -143,15 +142,12 @@ impl PyCatalogList { "Schema with name {name} doesn't exist." )))?; - Python::attach(|py| { - match catalog - .as_any() - .downcast_ref::() - { + Python::attach( + |py| match catalog.downcast_ref::() { Some(wrapped_catalog) => Ok(wrapped_catalog.catalog_provider.clone_ref(py)), None => PyCatalog::new_from_parts(catalog, self.codec.clone()).into_py_any(py), - } - }) + }, + ) } pub fn register_catalog(&self, name: &str, catalog_provider: Bound<'_, PyAny>) -> PyResult<()> { @@ -201,15 +197,12 @@ impl PyCatalog { "Schema with name {name} doesn't exist." )))?; - Python::attach(|py| { - match schema - .as_any() - .downcast_ref::() - { + Python::attach( + |py| match schema.downcast_ref::() { Some(wrapped_schema) => Ok(wrapped_schema.schema_provider.clone_ref(py)), None => PySchema::new_from_parts(schema, self.codec.clone()).into_py_any(py), - } - }) + }, + ) } pub fn register_schema(&self, name: &str, schema_provider: Bound<'_, PyAny>) -> PyResult<()> { @@ -356,10 +349,6 @@ impl SchemaProvider for RustWrappedPySchemaProvider { self.owner_name.as_deref() } - fn as_any(&self) -> &dyn Any { - self - } - fn table_names(&self) -> Vec { Python::attach(|py| { let provider = self.schema_provider.bind(py); @@ -465,10 +454,6 @@ impl RustWrappedPyCatalogProvider { #[async_trait] impl CatalogProvider for RustWrappedPyCatalogProvider { - fn as_any(&self) -> &dyn Any { - self - } - fn schema_names(&self) -> Vec { Python::attach(|py| { let provider = self.catalog_provider.bind(py); @@ -496,10 +481,7 @@ impl CatalogProvider for RustWrappedPyCatalogProvider { schema: Arc, ) -> datafusion::common::Result>> { Python::attach(|py| { - let py_schema = match schema - .as_any() - .downcast_ref::() - { + let py_schema = match schema.downcast_ref::() { Some(wrapped_schema) => wrapped_schema.schema_provider.as_any(), None => &PySchema::new_from_parts(schema, self.codec.clone()) .into_py_any(py) @@ -573,10 +555,6 @@ impl RustWrappedPyCatalogProviderList { #[async_trait] impl CatalogProviderList for RustWrappedPyCatalogProviderList { - fn as_any(&self) -> &dyn Any { - self - } - fn catalog_names(&self) -> Vec { Python::attach(|py| { let provider = self.catalog_provider_list.bind(py); @@ -604,10 +582,7 @@ impl CatalogProviderList for RustWrappedPyCatalogProviderList { catalog: Arc, ) -> Option> { Python::attach(|py| { - let py_catalog = match catalog - .as_any() - .downcast_ref::() - { + let py_catalog = match catalog.downcast_ref::() { Some(wrapped_schema) => wrapped_schema.catalog_provider.as_any().clone_ref(py), None => { match PyCatalog::new_from_parts(catalog, self.codec.clone()).into_py_any(py) { @@ -661,8 +636,8 @@ fn extract_catalog_provider_from_pyobj( .pointer_checked(Some(c"datafusion_catalog_provider"))? .cast(); let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - provider as Arc + let provider: Arc = provider.into(); + provider } else { match catalog_provider.extract::() { Ok(py_catalog) => py_catalog.catalog, @@ -693,8 +668,8 @@ fn extract_schema_provider_from_pyobj( .pointer_checked(Some(c"datafusion_schema_provider"))? .cast(); let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - provider as Arc + let provider: Arc = provider.into(); + provider } else { match schema_provider.extract::() { Ok(py_schema) => py_schema.schema, diff --git a/crates/core/src/common/data_type.rs b/crates/core/src/common/data_type.rs index af4179806..e79aea4ef 100644 --- a/crates/core/src/common/data_type.rs +++ b/crates/core/src/common/data_type.rs @@ -334,6 +334,9 @@ impl DataTypeMap { Ok(DataType::Interval(IntervalUnit::MonthDayNano)) } ScalarValue::List(arr) => Ok(arr.data_type().to_owned()), + ScalarValue::LargeList(arr) => Ok(arr.data_type().to_owned()), + ScalarValue::ListView(arr) => Ok(arr.data_type().to_owned()), + ScalarValue::LargeListView(arr) => Ok(arr.data_type().to_owned()), ScalarValue::Struct(_fields) => Err(PyNotImplementedError::new_err( "ScalarValue::Struct".to_string(), )), @@ -346,9 +349,6 @@ impl DataTypeMap { "ScalarValue::FixedSizeList".to_string(), )) } - ScalarValue::LargeList(_) => Err(PyNotImplementedError::new_err( - "ScalarValue::LargeList".to_string(), - )), ScalarValue::DurationSecond(_) => Ok(DataType::Duration(TimeUnit::Second)), ScalarValue::DurationMillisecond(_) => Ok(DataType::Duration(TimeUnit::Millisecond)), ScalarValue::DurationMicrosecond(_) => Ok(DataType::Duration(TimeUnit::Microsecond)), diff --git a/crates/core/src/common/schema.rs b/crates/core/src/common/schema.rs index 29a27b204..94b3ce0ae 100644 --- a/crates/core/src/common/schema.rs +++ b/crates/core/src/common/schema.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::borrow::Cow; use std::fmt::{self, Display, Formatter}; use std::sync::Arc; @@ -219,10 +218,6 @@ impl SqlTableSource { /// Implement TableSource, used in the logical query plan and in logical query optimizations impl TableSource for SqlTableSource { - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { self.schema.clone() } diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs index 96de01889..642afeef7 100644 --- a/crates/core/src/context.rs +++ b/crates/core/src/context.rs @@ -773,8 +773,8 @@ impl PySessionContext { .pointer_checked(Some(c"datafusion_catalog_provider_list"))? .cast(); let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - provider as Arc + let provider: Arc = provider.into(); + provider } else { match provider.extract::() { Ok(py_catalog_list) => py_catalog_list.catalog_list, @@ -809,8 +809,8 @@ impl PySessionContext { .pointer_checked(Some(c"datafusion_catalog_provider"))? .cast(); let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - provider as Arc + let provider: Arc = provider.into(); + provider } else { match provider.extract::() { Ok(py_catalog) => py_catalog.catalog, @@ -1071,10 +1071,7 @@ impl PySessionContext { "Catalog with name {name} doesn't exist." )))?; - match catalog - .as_any() - .downcast_ref::() - { + match catalog.downcast_ref::() { Some(wrapped_schema) => Ok(wrapped_schema.catalog_provider.clone_ref(py)), None => { Ok(PyCatalog::new_from_parts(catalog, self.ffi_logical_codec()).into_py_any(py)?) diff --git a/crates/core/src/dataframe.rs b/crates/core/src/dataframe.rs index 2e74991b8..8f1a20d0d 100644 --- a/crates/core/src/dataframe.rs +++ b/crates/core/src/dataframe.rs @@ -578,6 +578,11 @@ impl PyDataFrame { Ok(PyTable::from(table_provider)) } + fn alias(&self, alias: &str) -> PyDataFusionResult { + let df = self.df.as_ref().clone().alias(alias)?; + Ok(Self::new(df)) + } + #[pyo3(signature = (*args))] fn select_exprs(&self, args: Vec) -> PyDataFusionResult { let args = args.iter().map(|s| s.as_ref()).collect::>(); diff --git a/crates/core/src/dataset.rs b/crates/core/src/dataset.rs index dbeafcd9f..2a5770338 100644 --- a/crates/core/src/dataset.rs +++ b/crates/core/src/dataset.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::sync::Arc; use async_trait::async_trait; @@ -62,12 +61,6 @@ impl Dataset { #[async_trait] impl TableProvider for Dataset { - /// Returns the table provider as [`Any`](std::any::Any) so that it can be - /// downcast to a specific implementation. - fn as_any(&self) -> &dyn Any { - self - } - /// Get a reference to the schema for this table fn schema(&self) -> SchemaRef { Python::attach(|py| { diff --git a/crates/core/src/dataset_exec.rs b/crates/core/src/dataset_exec.rs index a7dd1500d..771119a0f 100644 --- a/crates/core/src/dataset_exec.rs +++ b/crates/core/src/dataset_exec.rs @@ -15,18 +15,18 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::sync::Arc; use datafusion::arrow::datatypes::SchemaRef; use datafusion::arrow::error::{ArrowError, Result as ArrowResult}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; +use datafusion::common::tree_node::TreeNodeRecursion; use datafusion::error::{DataFusionError as InnerDataFusionError, Result as DFResult}; use datafusion::execution::context::TaskContext; use datafusion::logical_expr::Expr; use datafusion::logical_expr::utils::conjunction; -use datafusion::physical_expr::{EquivalenceProperties, LexOrdering}; +use datafusion::physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr}; use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ @@ -156,11 +156,6 @@ impl ExecutionPlan for DatasetExec { Self::static_name() } - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - /// Get the schema for this execution plan fn schema(&self) -> SchemaRef { self.schema.clone() @@ -235,8 +230,15 @@ impl ExecutionPlan for DatasetExec { }) } - fn partition_statistics(&self, _partition: Option) -> DFResult { - Ok(self.projected_statistics.clone()) + fn partition_statistics(&self, _partition: Option) -> DFResult> { + Ok(Arc::new(self.projected_statistics.clone())) + } + + fn apply_expressions( + &self, + _f: &mut dyn FnMut(&dyn PhysicalExpr) -> DFResult, + ) -> DFResult { + Ok(TreeNodeRecursion::Continue) } fn properties(&self) -> &Arc { diff --git a/crates/core/src/expr.rs b/crates/core/src/expr.rs index 2e633baeb..eac571a11 100644 --- a/crates/core/src/expr.rs +++ b/crates/core/src/expr.rs @@ -23,8 +23,8 @@ use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::functions::core::expr_ext::FieldAccessor; use datafusion::logical_expr::expr::{ - AggregateFunction, AggregateFunctionParams, FieldMetadata, InList, InSubquery, ScalarFunction, - SetComparison, WindowFunction, + AggregateFunction, AggregateFunctionParams, FieldMetadata, HigherOrderFunction, InList, + InSubquery, Lambda, ScalarFunction, SetComparison, WindowFunction, }; use datafusion::logical_expr::utils::exprlist_to_fields; use datafusion::logical_expr::{ @@ -91,9 +91,12 @@ pub mod explain; pub mod extension; pub mod filter; pub mod grouping_set; +pub mod higher_order_function; pub mod in_list; pub mod in_subquery; pub mod join; +pub mod lambda; +pub mod lambda_variable; pub mod like; pub mod limit; pub mod literal; @@ -226,6 +229,14 @@ impl PyExpr { Expr::SetComparison(value) => { Ok(set_comparison::PySetComparison::from(value.clone()).into_bound_py_any(py)?) } + Expr::HigherOrderFunction(value) => Ok( + higher_order_function::PyHigherOrderFunction::from(value.clone()) + .into_bound_py_any(py)?, + ), + Expr::Lambda(value) => Ok(lambda::PyLambda::from(value.clone()).into_bound_py_any(py)?), + Expr::LambdaVariable(value) => { + Ok(lambda_variable::PyLambdaVariable::from(value.clone()).into_bound_py_any(py)?) + } }) } @@ -393,7 +404,10 @@ impl PyExpr { | Expr::OuterReferenceColumn(_, _) | Expr::Unnest(_) | Expr::IsNotUnknown(_) - | Expr::SetComparison(_) => RexType::Call, + | Expr::SetComparison(_) + | Expr::HigherOrderFunction(..) + | Expr::Lambda(..) => RexType::Call, + Expr::LambdaVariable(..) => RexType::Reference, Expr::ScalarSubquery(..) => RexType::ScalarSubquery, #[allow(deprecated)] Expr::Wildcard { .. } => { @@ -425,9 +439,10 @@ impl PyExpr { pub fn rex_call_operands(&self) -> PyResult> { match &self.expr { // Expr variants that are themselves the operand to return - Expr::Column(..) | Expr::ScalarVariable(..) | Expr::Literal(..) => { - Ok(vec![PyExpr::from(self.expr.clone())]) - } + Expr::Column(..) + | Expr::ScalarVariable(..) + | Expr::Literal(..) + | Expr::LambdaVariable(..) => Ok(vec![PyExpr::from(self.expr.clone())]), Expr::Alias(alias) => Ok(vec![PyExpr::from(*alias.expr.clone())]), @@ -454,13 +469,15 @@ impl PyExpr { params: AggregateFunctionParams { args, .. }, .. }) - | Expr::ScalarFunction(ScalarFunction { args, .. }) => { + | Expr::ScalarFunction(ScalarFunction { args, .. }) + | Expr::HigherOrderFunction(HigherOrderFunction { args, .. }) => { Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) } Expr::WindowFunction(boxed_window_fn) => { let args = &boxed_window_fn.params.args; Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) } + Expr::Lambda(Lambda { body, .. }) => Ok(vec![PyExpr::from(*body.clone())]), // Expr(s) that require more specific processing Expr::Case(Case { @@ -550,6 +567,10 @@ impl PyExpr { right: _, }) => format!("{op}"), Expr::ScalarFunction(ScalarFunction { func, args: _ }) => func.name().to_string(), + Expr::HigherOrderFunction(HigherOrderFunction { func, args: _ }) => { + func.name().to_string() + } + Expr::Lambda(..) => "lambda".to_string(), Expr::Cast { .. } => "cast".to_string(), Expr::Between { .. } => "between".to_string(), Expr::Case { .. } => "case".to_string(), @@ -837,7 +858,9 @@ impl PyExpr { | Operator::QuestionPipe | Operator::Colon => Err(py_type_err(format!("Unsupported expr: ${op}"))), }, - Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), + Expr::Cast(Cast { expr: _, field }) => { + DataTypeMap::map_from_arrow_type(field.data_type()) + } Expr::Literal(scalar_value, _) => DataTypeMap::map_from_scalar_value(scalar_value), _ => Err(py_type_err(format!( "Non Expr::Literal encountered in types: {expr:?}" @@ -893,6 +916,9 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/crates/core/src/expr/cast.rs b/crates/core/src/expr/cast.rs index 37d603538..484d0c059 100644 --- a/crates/core/src/expr/cast.rs +++ b/crates/core/src/expr/cast.rs @@ -52,7 +52,7 @@ impl PyCast { } fn data_type(&self) -> PyResult { - Ok(self.cast.data_type.clone().into()) + Ok(self.cast.field.data_type().clone().into()) } } @@ -81,6 +81,6 @@ impl PyTryCast { } fn data_type(&self) -> PyResult { - Ok(self.try_cast.data_type.clone().into()) + Ok(self.try_cast.field.data_type().clone().into()) } } diff --git a/crates/core/src/expr/higher_order_function.rs b/crates/core/src/expr/higher_order_function.rs new file mode 100644 index 000000000..91a94de2c --- /dev/null +++ b/crates/core/src/expr/higher_order_function.rs @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion::logical_expr::expr::HigherOrderFunction; +use pyo3::prelude::*; + +use super::PyExpr; + +#[pyclass( + from_py_object, + frozen, + name = "HigherOrderFunction", + module = "datafusion.expr", + subclass +)] +#[derive(Clone)] +pub struct PyHigherOrderFunction { + higher_order: HigherOrderFunction, +} + +impl From for PyHigherOrderFunction { + fn from(higher_order: HigherOrderFunction) -> PyHigherOrderFunction { + PyHigherOrderFunction { higher_order } + } +} + +impl From for HigherOrderFunction { + fn from(higher_order: PyHigherOrderFunction) -> Self { + higher_order.higher_order + } +} + +impl Display for PyHigherOrderFunction { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "HigherOrderFunction(name={}, args={:?})", + self.higher_order.name(), + &self.higher_order.args, + ) + } +} + +#[pymethods] +impl PyHigherOrderFunction { + /// Name of the higher-order function being invoked. + fn name(&self) -> String { + self.higher_order.name().to_string() + } + + /// Arguments passed to the higher-order function. Some entries may be + /// `Lambda` expressions; others are ordinary value expressions. + fn args(&self) -> Vec { + self.higher_order + .args + .iter() + .map(|e| PyExpr::from(e.clone())) + .collect() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("HigherOrderFunction({self})")) + } + + fn __name__(&self) -> PyResult { + Ok("HigherOrderFunction".to_string()) + } +} diff --git a/crates/core/src/expr/lambda.rs b/crates/core/src/expr/lambda.rs new file mode 100644 index 000000000..3ebc6e61c --- /dev/null +++ b/crates/core/src/expr/lambda.rs @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion::logical_expr::expr::Lambda; +use pyo3::prelude::*; + +use super::PyExpr; + +#[pyclass( + from_py_object, + frozen, + name = "Lambda", + module = "datafusion.expr", + subclass +)] +#[derive(Clone)] +pub struct PyLambda { + lambda: Lambda, +} + +impl From for PyLambda { + fn from(lambda: Lambda) -> PyLambda { + PyLambda { lambda } + } +} + +impl From for Lambda { + fn from(lambda: PyLambda) -> Self { + lambda.lambda + } +} + +impl Display for PyLambda { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "Lambda(params={:?}, body={:?})", + &self.lambda.params, &self.lambda.body, + ) + } +} + +#[pymethods] +impl PyLambda { + /// Parameter names of the lambda. + fn params(&self) -> Vec { + self.lambda.params.clone() + } + + /// Body expression of the lambda. + fn body(&self) -> PyExpr { + (*self.lambda.body).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("Lambda({self})")) + } + + fn __name__(&self) -> PyResult { + Ok("Lambda".to_string()) + } +} diff --git a/crates/core/src/expr/lambda_variable.rs b/crates/core/src/expr/lambda_variable.rs new file mode 100644 index 000000000..2ef554e17 --- /dev/null +++ b/crates/core/src/expr/lambda_variable.rs @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion::logical_expr::expr::LambdaVariable; +use pyo3::prelude::*; + +#[pyclass( + from_py_object, + frozen, + name = "LambdaVariable", + module = "datafusion.expr", + subclass +)] +#[derive(Clone)] +pub struct PyLambdaVariable { + variable: LambdaVariable, +} + +impl From for PyLambdaVariable { + fn from(variable: LambdaVariable) -> PyLambdaVariable { + PyLambdaVariable { variable } + } +} + +impl From for LambdaVariable { + fn from(variable: PyLambdaVariable) -> Self { + variable.variable + } +} + +impl Display for PyLambdaVariable { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "LambdaVariable({})", &self.variable.name) + } +} + +#[pymethods] +impl PyLambdaVariable { + /// Reference name of the lambda parameter. + fn name(&self) -> String { + self.variable.name.clone() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("LambdaVariable({self})")) + } + + fn __name__(&self) -> PyResult { + Ok("LambdaVariable".to_string()) + } +} diff --git a/crates/core/src/table.rs b/crates/core/src/table.rs index 623349771..e0f0f0d13 100644 --- a/crates/core/src/table.rs +++ b/crates/core/src/table.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::sync::Arc; use arrow::datatypes::SchemaRef; @@ -150,10 +149,6 @@ impl TempViewTable { #[async_trait] impl TableProvider for TempViewTable { - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { Arc::new(self.df.schema().as_arrow().clone()) } diff --git a/crates/core/src/udf.rs b/crates/core/src/udf.rs index c0a39cb47..d48bc729c 100644 --- a/crates/core/src/udf.rs +++ b/crates/core/src/udf.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::hash::{Hash, Hasher}; use std::ptr::NonNull; use std::sync::Arc; @@ -94,10 +93,6 @@ impl Hash for PythonFunctionScalarUDF { } impl ScalarUDFImpl for PythonFunctionScalarUDF { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { &self.name } diff --git a/crates/core/src/udtf.rs b/crates/core/src/udtf.rs index 9371732dc..b3de25e52 100644 --- a/crates/core/src/udtf.rs +++ b/crates/core/src/udtf.rs @@ -18,7 +18,7 @@ use std::ptr::NonNull; use std::sync::Arc; -use datafusion::catalog::{TableFunctionImpl, TableProvider}; +use datafusion::catalog::{TableFunctionArgs, TableFunctionImpl, TableProvider}; use datafusion::error::Result as DataFusionResult; use datafusion::logical_expr::Expr; use datafusion_ffi::udtf::FFI_TableFunction; @@ -93,7 +93,11 @@ impl PyTableFunction { #[pyo3(signature = (*args))] pub fn __call__(&self, args: Vec) -> PyResult { let args: Vec = args.iter().map(|e| e.expr.clone()).collect(); - let table_provider = self.call(&args).map_err(py_datafusion_err)?; + let global = PySessionContext::global_ctx()?; + let state = global.ctx.state(); + let table_provider = self + .call_with_args(TableFunctionArgs::new(&args, &state)) + .map_err(py_datafusion_err)?; Ok(PyTable::from(table_provider)) } @@ -125,10 +129,12 @@ fn call_python_table_function( } impl TableFunctionImpl for PyTableFunction { - fn call(&self, args: &[Expr]) -> DataFusionResult> { + fn call_with_args(&self, args: TableFunctionArgs) -> DataFusionResult> { match &self.inner { - PyTableFunctionInner::FFIFunction(func) => func.call(args), - PyTableFunctionInner::PythonFunction(obj) => call_python_table_function(obj, args), + PyTableFunctionInner::FFIFunction(func) => func.call_with_args(args), + PyTableFunctionInner::PythonFunction(obj) => { + call_python_table_function(obj, args.exprs()) + } } } } diff --git a/crates/core/src/udwf.rs b/crates/core/src/udwf.rs index 1d3608ada..40e6208c4 100644 --- a/crates/core/src/udwf.rs +++ b/crates/core/src/udwf.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::ops::Range; use std::ptr::NonNull; use std::sync::Arc; @@ -317,10 +316,6 @@ impl MultiColumnWindowUDF { } impl WindowUDFImpl for MultiColumnWindowUDF { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { &self.name } diff --git a/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py b/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py index 93449c660..c4a94348d 100644 --- a/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py +++ b/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py @@ -63,15 +63,18 @@ def test_schema_provider_extract_values(inner_capsule: bool) -> None: result = ctx.table(f"{expected_schema_name}.{expected_table_name}").collect() assert len(result) == 2 + # Multi-partition collect order is non-deterministic; sort batches by + # first value of col0 so col0 and col1 stay aligned. + result = sorted(result, key=lambda r: r.column(0)[0].as_py()) col0_result = [r.column(0) for r in result] col1_result = [r.column(1) for r in result] expected_col0 = [ - pa.array([10, 20, 30], type=pa.int32()), pa.array([5, 7], type=pa.int32()), + pa.array([10, 20, 30], type=pa.int32()), ] expected_col1 = [ - pa.array([1, 2, 5], type=pa.float64()), pa.array([1.5, 2.5], type=pa.float64()), + pa.array([1, 2, 5], type=pa.float64()), ] assert col0_result == expected_col0 assert col1_result == expected_col1 diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_function.py b/examples/datafusion-ffi-example/python/tests/_test_table_function.py index bf5aae3bd..de4662dd7 100644 --- a/examples/datafusion-ffi-example/python/tests/_test_table_function.py +++ b/examples/datafusion-ffi-example/python/tests/_test_table_function.py @@ -39,7 +39,8 @@ def test_ffi_table_function_register() -> None: assert result[0].num_columns == 4 print(result) - result = [r.column(0) for r in result] + # Multi-partition collect order is non-deterministic; sort by first value. + result = sorted((r.column(0) for r in result), key=lambda a: a[0].as_py()) expected = [ pa.array([0, 1, 2], type=pa.int32()), pa.array([3, 4, 5, 6], type=pa.int32()), @@ -61,7 +62,7 @@ def test_ffi_table_function_call_directly(): assert result[0].num_columns == 4 print(result) - result = [r.column(0) for r in result] + result = sorted((r.column(0) for r in result), key=lambda a: a[0].as_py()) expected = [ pa.array([0, 1, 2], type=pa.int32()), pa.array([3, 4, 5, 6], type=pa.int32()), @@ -96,7 +97,7 @@ def common_table_function_test(test_ctx: SessionContext) -> None: assert result[0].num_columns == 3 print(result) - result = [r.column(0) for r in result] + result = sorted((r.column(0) for r in result), key=lambda a: a[0].as_py()) expected = [ pa.array([0, 1], type=pa.int32()), pa.array([2, 3, 4], type=pa.int32()), diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_provider.py b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py index fc77d2d3b..aee16f839 100644 --- a/examples/datafusion-ffi-example/python/tests/_test_table_provider.py +++ b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py @@ -36,7 +36,9 @@ def test_table_provider_ffi(inner_capsule: bool) -> None: assert len(result) == 4 assert result[0].num_columns == 3 - result = [r.column(0) for r in result] + # Multi-partition collect order is non-deterministic; sort by first value + # in column 0, which is unique per partition (0, 2, 4, 6). + result = sorted((r.column(0) for r in result), key=lambda a: a[0].as_py()) expected = [ pa.array([0, 1], type=pa.int32()), pa.array([2, 3, 4], type=pa.int32()), @@ -47,5 +49,5 @@ def test_table_provider_ffi(inner_capsule: bool) -> None: assert result == expected result = ctx.read_table(table).collect() - result = [r.column(0) for r in result] + result = sorted((r.column(0) for r in result), key=lambda a: a[0].as_py()) assert result == expected diff --git a/examples/datafusion-ffi-example/src/aggregate_udf.rs b/examples/datafusion-ffi-example/src/aggregate_udf.rs index d5343ff91..86737778f 100644 --- a/examples/datafusion-ffi-example/src/aggregate_udf.rs +++ b/examples/datafusion-ffi-example/src/aggregate_udf.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::sync::Arc; use arrow_schema::DataType; @@ -61,10 +60,6 @@ impl MySumUDF { } impl AggregateUDFImpl for MySumUDF { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "my_custom_sum" } diff --git a/examples/datafusion-ffi-example/src/catalog_provider.rs b/examples/datafusion-ffi-example/src/catalog_provider.rs index bd5da1e4d..6131ab0f0 100644 --- a/examples/datafusion-ffi-example/src/catalog_provider.rs +++ b/examples/datafusion-ffi-example/src/catalog_provider.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::fmt::Debug; use std::sync::Arc; @@ -106,10 +105,6 @@ impl FixedSchemaProvider { #[async_trait] impl SchemaProvider for FixedSchemaProvider { - fn as_any(&self) -> &dyn Any { - self - } - fn table_names(&self) -> Vec { self.inner.table_names() } @@ -149,10 +144,6 @@ pub(crate) struct MyCatalogProvider { } impl CatalogProvider for MyCatalogProvider { - fn as_any(&self) -> &dyn Any { - self - } - fn schema_names(&self) -> Vec { self.inner.schema_names() } @@ -220,10 +211,6 @@ pub(crate) struct MyCatalogProviderList { } impl CatalogProviderList for MyCatalogProviderList { - fn as_any(&self) -> &dyn Any { - self - } - fn catalog_names(&self) -> Vec { self.inner.catalog_names() } diff --git a/examples/datafusion-ffi-example/src/scalar_udf.rs b/examples/datafusion-ffi-example/src/scalar_udf.rs index 374924781..a3c65e875 100644 --- a/examples/datafusion-ffi-example/src/scalar_udf.rs +++ b/examples/datafusion-ffi-example/src/scalar_udf.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::sync::Arc; use arrow_array::{Array, BooleanArray}; @@ -61,10 +60,6 @@ impl IsNullUDF { } impl ScalarUDFImpl for IsNullUDF { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "my_custom_is_null" } diff --git a/examples/datafusion-ffi-example/src/window_udf.rs b/examples/datafusion-ffi-example/src/window_udf.rs index cbf179a86..f33a166ed 100644 --- a/examples/datafusion-ffi-example/src/window_udf.rs +++ b/examples/datafusion-ffi-example/src/window_udf.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::sync::Arc; use arrow_schema::{DataType, FieldRef}; @@ -56,10 +55,6 @@ impl MyRankUDF { } impl WindowUDFImpl for MyRankUDF { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "my_custom_rank" } diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index f08b464bb..601419fab 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -69,7 +69,12 @@ # The following imports are okay to remain as opaque to the user. from ._internal import Config -from .catalog import Catalog, Table +from .catalog import ( + Catalog, + Table, + TableProviderFactory, + TableProviderFactoryExportable, +) from .col import col, column from .common import DFSchema from .context import ( @@ -133,6 +138,8 @@ "SessionContext", "Table", "TableFunction", + "TableProviderFactory", + "TableProviderFactoryExportable", "WindowFrame", "WindowUDF", "catalog", diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 2b07861da..9ac8293d6 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -523,6 +523,32 @@ def select_exprs(self, *args: str) -> DataFrame: """ return self.df.select_exprs(*args) + def alias(self, alias: str) -> DataFrame: + """Assign a table alias to this :py:class:`DataFrame`. + + Replaces the qualifiers of the output columns with ``alias``. Useful for + self-joins and any situation that needs an unambiguous table-style + qualifier (``alias.col``) for downstream references. + + Args: + alias: Table alias to apply to the DataFrame's columns. + + Returns: + DataFrame with columns re-qualified under ``alias``. + + Example: + >>> from datafusion import col + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"id": [1, 2], "val": [10, 20]}) + >>> left = df.alias("l") + >>> right = df.alias("r") + >>> left.join(right, left_on="id", right_on="id").select( + ... "id", col("l.val").alias("lval"), col("r.val").alias("rval") + ... ).sort("id").to_pydict() + {'id': [1, 2], 'lval': [10, 20], 'rval': [10, 20]} + """ + return DataFrame(self.df.alias(alias)) + def select(self, *exprs: Expr | str) -> DataFrame: """Project arbitrary expressions into a new :py:class:`DataFrame`. diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index e0135e3ed..55cc2e52a 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -150,6 +150,9 @@ TransactionStart = expr_internal.TransactionStart TryCast = expr_internal.TryCast Union = expr_internal.Union +HigherOrderFunction = expr_internal.HigherOrderFunction +Lambda = expr_internal.Lambda +LambdaVariable = expr_internal.LambdaVariable Unnest = expr_internal.Unnest UnnestExpr = expr_internal.UnnestExpr Values = expr_internal.Values @@ -193,6 +196,7 @@ "FileType", "Filter", "GroupingSet", + "HigherOrderFunction", "ILike", "InList", "InSubquery", @@ -207,6 +211,8 @@ "Join", "JoinConstraint", "JoinType", + "Lambda", + "LambdaVariable", "Like", "Limit", "Literal", diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 08062851a..9761d1879 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -184,6 +184,7 @@ "ifnull", "in_list", "initcap", + "instr", "isnan", "iszero", "lag", @@ -273,6 +274,7 @@ "percent_rank", "percentile_cont", "pi", + "position", "pow", "power", "quantile_cont", @@ -3874,7 +3876,7 @@ def arrays_zip(*arrays: Expr) -> Expr: >>> result = df.select( ... dfn.functions.arrays_zip(dfn.col("a"), dfn.col("b")).alias("result")) >>> result.collect_column("result")[0].as_py() - [{'c0': 1, 'c1': 3}, {'c0': 2, 'c1': 4}] + [{'1': 1, '2': 3}, {'1': 2, '2': 4}] """ args = [a.expr for a in arrays] return Expr(f.arrays_zip(args)) diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py index 240332848..f5c54f756 100644 --- a/python/tests/test_aggregation.py +++ b/python/tests/test_aggregation.py @@ -125,18 +125,34 @@ def test_aggregation_stats(df, agg_expr, calc_expected): pa.array([1], type=pa.uint64()), False, ), - (f.approx_median(column("b")), pa.array([4]), False), - (f.median(column("b"), distinct=True), pa.array([5]), False), - (f.median(column("b"), filter=column("a") != 2), pa.array([5]), False), - (f.approx_median(column("b"), filter=column("a") != 2), pa.array([5]), False), - (f.approx_percentile_cont(column("b"), 0.5), pa.array([4]), False), + (f.approx_median(column("b")), pa.array([4], type=pa.float64()), False), + ( + f.median(column("b"), distinct=True), + pa.array([5], type=pa.float64()), + False, + ), + ( + f.median(column("b"), filter=column("a") != 2), + pa.array([5], type=pa.float64()), + False, + ), + ( + f.approx_median(column("b"), filter=column("a") != 2), + pa.array([5], type=pa.float64()), + False, + ), + ( + f.approx_percentile_cont(column("b"), 0.5), + pa.array([4], type=pa.float64()), + False, + ), ( f.approx_percentile_cont( column("b").sort(ascending=True, nulls_first=False), 0.5, num_centroids=2, ), - pa.array([4]), + pa.array([4.75], type=pa.float64()), False, ), ( @@ -212,19 +228,19 @@ def test_aggregation(df, agg_expr, expected, array_sort): ( "approx_percentile_cont", f.approx_percentile_cont(column("c3"), 0.95, num_centroids=200), - [73, 68, 122, 124, 115], + [73.55, 68.0, 122.5, 124.2, 115.6], ), ( "approx_perc_cont_few_centroids", f.approx_percentile_cont(column("c3"), 0.95, num_centroids=5), - [72, 68, 119, 124, 115], + [72.775, 68.0, 119.4075, 124.825, 115.44], ), ( "approx_perc_cont_filtered", f.approx_percentile_cont( column("c3"), 0.95, num_centroids=200, filter=column("c3") > lit(0) ), - [83, 68, 122, 124, 117], + [83.0, 68.0, 122.75, 124.9, 117.6], ), ( "corr", diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 6a466f6f2..cf1fe43e8 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -172,7 +172,10 @@ def test_relational_expr(test_ctx): assert df.filter(col("b") == "beta").count() == 1 assert df.filter(col("b") != "beta").count() == 2 - assert df.filter(col("a") == "beta").count() == 0 + # Upstream DataFusion now errors on string→Int64 implicit cast in filter + # (previously silently produced 0 matches). + with pytest.raises(Exception, match="Cannot cast string 'beta'"): + df.filter(col("a") == "beta").count() assert df.filter(col("a") == None).count() == 1 # noqa: E711 assert df.filter(col("a") != None).count() == 3 # noqa: E711 assert df.filter(col("b") == None).count() == 1 # noqa: E711 @@ -613,7 +616,7 @@ def test_alias_with_metadata(df): # pytest.param( col("c").reverse(), - pa.array(["olleH", " dlrow ", "!", None], type=pa.string()), + pa.array(["olleH", " dlrow ", "!", None], type=pa.string_view()), id="reverse", ), pytest.param( @@ -633,7 +636,7 @@ def test_alias_with_metadata(df): ), pytest.param( col("c").lower(), - pa.array(["hello", " world ", "!", None], type=pa.string()), + pa.array(["hello", " world ", "!", None], type=pa.string_view()), id="lower", ), pytest.param( @@ -767,7 +770,7 @@ def test_alias_with_metadata(df): ), pytest.param( col("c").upper(), - pa.array(["HELLO", " WORLD ", "!", None], type=pa.string()), + pa.array(["HELLO", " WORLD ", "!", None], type=pa.string_view()), id="upper", ), pytest.param( diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index d9781b1fb..5538fc33b 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -836,7 +836,7 @@ def test_map_functions(func, expected): (f.chr(literal(68)), pa.array(["D", "D", "D"])), ( f.concat_ws("-", column("a"), literal("test")), - pa.array(["Hello-test", "World-test", "!-test"]), + pa.array(["Hello-test", "World-test", "!-test"], type=pa.string_view()), ), ( f.concat(column("a").cast(pa.string()), literal("?")), @@ -851,7 +851,10 @@ def test_map_functions(func, expected): pa.array(["Hel", "Wor", "!"], type=pa.string_view()), ), (f.length(column("c")), pa.array([6, 7, 2], type=pa.int32())), - (f.lower(column("a")), pa.array(["hello", "world", "!"])), + ( + f.lower(column("a")), + pa.array(["hello", "world", "!"], type=pa.string_view()), + ), (f.lpad(column("a"), literal(7)), pa.array([" Hello", " World", " !"])), ( f.ltrim(column("c")), @@ -871,13 +874,16 @@ def test_map_functions(func, expected): (f.octet_length(column("a")), pa.array([5, 5, 1], type=pa.int32())), ( f.repeat(column("a"), literal(2)), - pa.array(["HelloHello", "WorldWorld", "!!"]), + pa.array(["HelloHello", "WorldWorld", "!!"], type=pa.string_view()), ), ( f.replace(column("a"), literal("l"), literal("?")), pa.array(["He??o", "Wor?d", "!"]), ), - (f.reverse(column("a")), pa.array(["olleH", "dlroW", "!"])), + ( + f.reverse(column("a")), + pa.array(["olleH", "dlroW", "!"], type=pa.string_view()), + ), ( f.right(column("a"), literal(4)), pa.array(["ello", "orld", "!"], type=pa.string_view()), @@ -892,7 +898,7 @@ def test_map_functions(func, expected): ), ( f.split_part(column("a"), literal("l"), literal(1)), - pa.array(["He", "Wor", "!"]), + pa.array(["He", "Wor", "!"], type=pa.string_view()), ), (f.contains(column("a"), literal("ell")), pa.array([True, False, False])), (f.starts_with(column("a"), literal("Wor")), pa.array([False, True, False])), @@ -903,14 +909,17 @@ def test_map_functions(func, expected): ), ( f.translate(column("a"), literal("or"), literal("ld")), - pa.array(["Helll", "Wldld", "!"]), + pa.array(["Helll", "Wldld", "!"], type=pa.string_view()), ), (f.trim(column("c")), pa.array(["hello", "world", "!"], type=pa.string_view())), - (f.upper(column("c")), pa.array(["HELLO ", " WORLD ", " !"])), + ( + f.upper(column("c")), + pa.array(["HELLO ", " WORLD ", " !"], type=pa.string_view()), + ), (f.ends_with(column("a"), literal("llo")), pa.array([True, False, False])), ( f.overlay(column("a"), literal("--"), literal(2)), - pa.array(["H--lo", "W--ld", "--"]), + pa.array(["H--lo", "W--ld", "!--"]), ), ( f.regexp_like(column("a"), literal("(ell|orl)")), @@ -2063,7 +2072,7 @@ def test_arrays_zip_aliases(func): df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]}) result = df.select(func(column("a"), column("b")).alias("v")).collect() values = result[0].column(0)[0].as_py() - assert values == [{"c0": 1, "c1": 3}, {"c0": 2, "c1": 4}] + assert values == [{"1": 1, "2": 3}, {"1": 2, "2": 4}] @pytest.mark.parametrize("func", [f.string_to_array, f.string_to_list])