From 815f62c94b498ed4edcb200b6280edfb6f3669a0 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 16 Dec 2024 12:41:45 -0500 Subject: [PATCH] Update datafusion git tag to fix DenseUnion (#951) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref https://github.com/apache/datafusion/pull/13797. This is now working for geometries! ``` SELECT ST_GeomFromText('LINESTRING(-71.160281 42.258729,-71.160837 42.259113,-71.161144 42.25932)'); ``` ``` ---- udf::native::io::wkt::test::test stdout ---- +----------------------------------------------------------------------------------------------------+ | st_geomfromtext(Utf8("LINESTRING(-71.160281 42.258729,-71.160837 42.259113,-71.161144 42.25932)")) | +----------------------------------------------------------------------------------------------------+ | {=[{x: -71.160281, y: 42.258729}, {x: -71.160837, y: 42.259113}, {x: -71.161144, y: 42.25932}]} | +----------------------------------------------------------------------------------------------------+ ``` 🚀 --- Cargo.lock | 45 ++++----- rust/geodatafusion/Cargo.toml | 2 +- rust/geodatafusion/src/udf/native/io/mod.rs | 1 - .../src/udf/native/io/union_example.rs | 95 ------------------- rust/geodatafusion/src/udf/native/io/wkt.rs | 14 +-- 5 files changed, 26 insertions(+), 131 deletions(-) delete mode 100644 rust/geodatafusion/src/udf/native/io/union_example.rs diff --git a/Cargo.lock b/Cargo.lock index c5761dd4..aea52987 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -955,7 +955,7 @@ dependencies = [ [[package]] name = "datafusion" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "arrow-array", @@ -1005,7 +1005,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow-schema", "async-trait", @@ -1019,7 +1019,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1042,7 +1042,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "log", "tokio", @@ -1051,12 +1051,12 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" [[package]] name = "datafusion-execution" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "dashmap", @@ -1074,7 +1074,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "chrono", @@ -1094,7 +1094,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "datafusion-common", @@ -1104,7 +1104,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "arrow-buffer", @@ -1116,6 +1116,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-macros", "hashbrown 0.14.5", "hex", @@ -1132,7 +1133,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1153,7 +1154,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1165,7 +1166,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "arrow-array", @@ -1186,7 +1187,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "async-trait", @@ -1201,7 +1202,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1217,7 +1218,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1226,7 +1227,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "quote", "syn 2.0.79", @@ -1235,7 +1236,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "chrono", @@ -1253,7 +1254,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1277,7 +1278,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1290,7 +1291,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "datafusion-common", @@ -1306,7 +1307,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1337,7 +1338,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "arrow-array", diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml index 2f99ee6b..ef471815 100644 --- a/rust/geodatafusion/Cargo.toml +++ b/rust/geodatafusion/Cargo.toml @@ -12,7 +12,7 @@ rust-version = "1.82" [dependencies] -datafusion = { git = "https://github.com/apache/datafusion", rev = "03e39da62e403e064d21b57e9d6c200464c03749" } +datafusion = { git = "https://github.com/kylebarron/datafusion", rev = "170432e3179ed72f413ffcd4d7edfe0007db296d" } arrow = { version = "53.3", features = ["ffi"] } arrow-array = { version = "53.3", features = ["chrono-tz"] } arrow-buffer = "53.3" diff --git a/rust/geodatafusion/src/udf/native/io/mod.rs b/rust/geodatafusion/src/udf/native/io/mod.rs index 63d63aad..ea85c047 100644 --- a/rust/geodatafusion/src/udf/native/io/mod.rs +++ b/rust/geodatafusion/src/udf/native/io/mod.rs @@ -1,7 +1,6 @@ //! Geometry Input and Output mod geohash; -mod union_example; mod wkb; mod wkt; diff --git a/rust/geodatafusion/src/udf/native/io/union_example.rs b/rust/geodatafusion/src/udf/native/io/union_example.rs deleted file mode 100644 index 982edc63..00000000 --- a/rust/geodatafusion/src/udf/native/io/union_example.rs +++ /dev/null @@ -1,95 +0,0 @@ -use std::any::Any; -use std::sync::Arc; - -use arrow::array::UnionBuilder; -use arrow::datatypes::{Float64Type, Int32Type}; -use arrow_array::Array; -use arrow_schema::{DataType, Field, UnionFields, UnionMode}; -use datafusion::logical_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, -}; - -#[derive(Debug)] -pub struct UnionExample { - signature: Signature, -} - -impl UnionExample { - #[allow(dead_code)] - pub fn new() -> Self { - Self { - signature: Signature::nullary(Volatility::Immutable), - } - } -} - -impl ScalarUDFImpl for UnionExample { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "example_union" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { - let fields = UnionFields::new( - vec![0, 1], - vec![ - Arc::new(Field::new("a", DataType::Int32, false)), - Arc::new(Field::new("b", DataType::Float64, false)), - ], - ); - Ok(DataType::Union(fields, UnionMode::Dense)) - } - - fn invoke_no_args(&self, _number_rows: usize) -> datafusion::error::Result { - let mut builder = UnionBuilder::new_dense(); - builder.append::("a", 1).unwrap(); - builder.append::("b", 3.0).unwrap(); - builder.append::("a", 4).unwrap(); - let arr = builder.build().unwrap(); - - assert_eq!(arr.type_id(0), 0); - assert_eq!(arr.type_id(1), 1); - assert_eq!(arr.type_id(2), 0); - - assert_eq!(arr.value_offset(0), 0); - assert_eq!(arr.value_offset(1), 0); - assert_eq!(arr.value_offset(2), 1); - - let arr = arr.slice(0, 1); - - assert!(matches!( - arr.data_type(), - DataType::Union(_, UnionMode::Dense) - )); - - Ok(ColumnarValue::Array(Arc::new(arr))) - } - - fn documentation(&self) -> Option<&Documentation> { - None - } -} - -#[cfg(test)] -mod test { - use super::*; - use datafusion::prelude::*; - - #[tokio::test] - async fn test() { - let ctx = SessionContext::new(); - ctx.register_udf(UnionExample::new().into()); - - let out = ctx.sql("SELECT example_union();").await.unwrap(); - // TODO: fix this error upstream - // https://github.com/apache/datafusion/issues/13762 - out.show().await.unwrap_err(); - } -} diff --git a/rust/geodatafusion/src/udf/native/io/wkt.rs b/rust/geodatafusion/src/udf/native/io/wkt.rs index 872afacd..d3d23c21 100644 --- a/rust/geodatafusion/src/udf/native/io/wkt.rs +++ b/rust/geodatafusion/src/udf/native/io/wkt.rs @@ -2,7 +2,6 @@ use std::any::Any; use std::sync::OnceLock; use arrow::array::AsArray; -use arrow_array::Array; use arrow_schema::DataType; use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; use datafusion::logical_expr::{ @@ -131,14 +130,7 @@ fn geom_from_text_impl(args: &[ColumnarValue]) -> GeoDataFusionResult().clone(), Default::default()); let native_arr = read_wkt(&wkt_arr, CoordType::Separated, false)?; - dbg!("native_arr"); - - let arrow_arr = native_arr.to_array_ref(); - if let DataType::Union(_fields, mode) = arrow_arr.data_type() { - dbg!(mode); - } - - Ok(arrow_arr.into()) + Ok(native_arr.to_array_ref().into()) } #[cfg(test)] @@ -153,8 +145,6 @@ mod test { register_native(&ctx); let out = ctx.sql("SELECT ST_GeomFromText('LINESTRING(-71.160281 42.258729,-71.160837 42.259113,-71.161144 42.25932)');").await.unwrap(); - // TODO: fix this error upstream - // https://github.com/apache/datafusion/issues/13762 - out.show().await.unwrap_err(); + out.show().await.unwrap(); } }