From 0ab9dc00918cb914067bda3b7251e7ca49047aee Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 09:37:55 -0600 Subject: [PATCH 01/15] fix: remove leftover dbg! --- src/array/coord/separated/array.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/array/coord/separated/array.rs b/src/array/coord/separated/array.rs index 7d76b610..c1980033 100644 --- a/src/array/coord/separated/array.rs +++ b/src/array/coord/separated/array.rs @@ -20,7 +20,6 @@ pub struct SeparatedCoordBuffer { } fn check(buffers: &[ScalarBuffer; D]) -> Result<()> { - dbg!(buffers); if !buffers.windows(2).all(|w| w[0].len() == w[1].len()) { return Err(GeoArrowError::General( "all buffers must have the same length".to_string(), From 54cd466f18dc52ee3e39647960b5b7f597d55b98 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 11:31:27 -0600 Subject: [PATCH 02/15] fix!: flip argument order for from_arrow_and_geometry --- src/table/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table/mod.rs b/src/table/mod.rs index 18068eb2..2f9bef1d 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -61,8 +61,8 @@ impl Table { } pub fn from_arrow_and_geometry( - batches: Vec, schema: SchemaRef, + batches: Vec, geometry: Arc, ) -> Result { if batches.is_empty() { From d26cbdabb16bcf7b6e3af0fa01283ddd80aee1ea Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 12:08:42 -0600 Subject: [PATCH 03/15] docs: add a bunch of documentation We're not yet complete, but it's a start. --- Cargo.toml | 1 + README.md | 9 +- js/README.md | 7 +- python/README.md | 4 +- src/algorithm/mod.rs | 2 + src/array/coord/mod.rs | 3 + src/array/metadata.rs | 1 + src/array/mod.rs | 2 + src/{chunked_array => }/chunked_array.rs | 471 ++++++++++++++++- src/chunked_array/mod.rs | 19 - src/datatypes.rs | 99 +++- src/error.rs | 17 + src/geo_traits/mod.rs | 2 + src/indexed/mod.rs | 6 +- src/io/mod.rs | 2 + src/lib.rs | 54 +- src/scalar/mod.rs | 2 + src/{table/mod.rs => table.rs} | 261 +++++++++- src/trait_.rs | 610 +++++++++++++++++++++-- 19 files changed, 1462 insertions(+), 110 deletions(-) rename src/{chunked_array => }/chunked_array.rs (51%) delete mode 100644 src/chunked_array/mod.rs rename src/{table/mod.rs => table.rs} (64%) diff --git a/Cargo.toml b/Cargo.toml index 1ae271f6..a2af42ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -155,6 +155,7 @@ bench = false # TODO fix this benchmark required-features = ["parquet_compression"] [package.metadata.docs.rs] +rustdoc-args = ["--cfg", "docsrs"] features = [ "csv", "flatgeobuf", diff --git a/README.md b/README.md index 9ffc12d3..04c3f184 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ -# `geoarrow-rs` +# geoarrow-rs + +[![GitHub Workflow Status (CI)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/ci.yml?branch=main&style=for-the-badge)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/ci.yml) +[![docs.rs](https://img.shields.io/docsrs/geoarrow?style=for-the-badge&label=docs.rs)](https://docs.rs/geoarrow/latest/geoarrow/) +[![Crates.io](https://img.shields.io/crates/v/geoarrow?style=for-the-badge)](https://crates.io/crates/geoarrow) +![Crates.io](https://img.shields.io/crates/l/geoarrow?style=for-the-badge) A Rust implementation of the [GeoArrow](https://github.com/geoarrow/geoarrow) specification and bindings to [GeoRust algorithms](https://github.com/georust/geo) for efficient spatial operations on GeoArrow memory. @@ -20,7 +25,7 @@ This repository also includes [Python bindings](https://github.com/geoarrow/geoa Add this to your `Cargo.toml`: ```toml -geoarrow = "0.1" +geoarrow = "0.2" ``` ## References diff --git a/js/README.md b/js/README.md index aa499e7e..9644e32f 100644 --- a/js/README.md +++ b/js/README.md @@ -1,4 +1,6 @@ -# `geoarrow-wasm` +# geoarrow-wasm + +[![GitHub Workflow Status (WASM)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/wasm.yml?label=WASM&branch=main&style=for-the-badge)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/wasm.yml) Efficient, vectorized geospatial operations in WebAssembly. @@ -21,7 +23,6 @@ I wrote a [blog post](https://kylebarron.dev/blog/geos-wasm) about this that goe Most users will use this by installing the prebuilt JavaScript package. This is published to NPM as [`geoarrow-wasm`](https://npmjs.com/package/geoarrow-wasm). - ### From Rust Advanced users can also depend on these Rust-Wasm bindings directly, enabling you to add custom operations on top of these bindings and generating your own WebAssembly bundles. This means you can reuse all the binding between JavaScript and WebAssembly and focus on implementing your algorithms. This package is published to crates.io as [`geoarrow-wasm`](https://crates.io/crates/geoarrow-wasm). @@ -31,5 +32,3 @@ Advanced users can also depend on these Rust-Wasm bindings directly, enabling yo - [Prototyping GeoRust + GeoArrow in WebAssembly](https://observablehq.com/@kylebarron/prototyping-georust-geoarrow-in-webassembly) ## How it Works - - diff --git a/python/README.md b/python/README.md index 6fdb3430..509650de 100644 --- a/python/README.md +++ b/python/README.md @@ -1,4 +1,6 @@ -# `geoarrow.rust`: Python bindings to `geoarrow-rs` +# geoarrow.rust + +[![GitHub Workflow Status (Python)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/python.yml?branch=main&style=for-the-badge)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/python.yml) This folder contains Python bindings to the [GeoArrow Rust implementation](https://github.com/geoarrow/geoarrow-rs). diff --git a/src/algorithm/mod.rs b/src/algorithm/mod.rs index f170d362..f7acc616 100644 --- a/src/algorithm/mod.rs +++ b/src/algorithm/mod.rs @@ -1,5 +1,7 @@ //! Vectorized algorithms implemented on and returning GeoArrow arrays. +#![allow(missing_docs)] // FIXME + pub mod broadcasting; pub mod geo; pub mod geo_index; diff --git a/src/array/coord/mod.rs b/src/array/coord/mod.rs index 78cce7d8..5a96375d 100644 --- a/src/array/coord/mod.rs +++ b/src/array/coord/mod.rs @@ -19,7 +19,10 @@ pub use separated::{SeparatedCoordBuffer, SeparatedCoordBufferBuilder}; /// buffers as XXXX and YYYY. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum CoordType { + /// Interleaved coordinates. #[default] Interleaved, + + /// Separated coordinates. Separated, } diff --git a/src/array/metadata.rs b/src/array/metadata.rs index 9136518a..44ffde63 100644 --- a/src/array/metadata.rs +++ b/src/array/metadata.rs @@ -12,6 +12,7 @@ use crate::error::GeoArrowError; /// this value is omitted, edges will be interpreted as planar. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum Edges { + /// Follow a spherical path rather than a planar. #[serde(rename = "spherical")] Spherical, } diff --git a/src/array/mod.rs b/src/array/mod.rs index e87666a0..ed59ffd9 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -1,5 +1,7 @@ //! Implementations of immutable GeoArrow arrays plus builders to more easily create arrays. +#![allow(missing_docs)] // FIXME + pub use binary::{WKBArray, WKBBuilder, WKBCapacity}; pub use cast::{AsChunkedGeometryArray, AsGeometryArray}; pub use coord::{ diff --git a/src/chunked_array/chunked_array.rs b/src/chunked_array.rs similarity index 51% rename from src/chunked_array/chunked_array.rs rename to src/chunked_array.rs index 4c5d5722..71095d4d 100644 --- a/src/chunked_array/chunked_array.rs +++ b/src/chunked_array.rs @@ -1,3 +1,12 @@ +//! Contains implementations of _chunked_ GeoArrow arrays. +//! +//! In contrast to the structures in [array](crate::array), these data structures only have contiguous +//! memory within each individual _chunk_. These chunked arrays are essentially wrappers around a +//! [Vec] of geometry arrays. +//! +//! Additionally, if the `rayon` feature is active, operations on chunked arrays will automatically +//! be parallelized across each chunk. + use std::any::Any; use std::collections::HashSet; use std::sync::Arc; @@ -25,6 +34,18 @@ pub struct ChunkedArray { } impl ChunkedArray { + /// Creates a new chunked array from multiple arrays. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// ``` pub fn new(chunks: Vec) -> Self { let mut length = 0; chunks.iter().for_each(|x| length += x.len()); @@ -39,32 +60,130 @@ impl ChunkedArray { Self { chunks, length } } + /// Converts this chunked array into its inner chunks. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.into_inner(); + /// assert_eq!(chunks.len(), 2); + /// ``` pub fn into_inner(self) -> Vec { self.chunks } + /// Returns this chunked array's length. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.len(), 4); + /// ``` pub fn len(&self) -> usize { self.length } + /// Returns true if chunked array is empty. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// assert!(ChunkedArray::::new(Vec::new()).is_empty()); + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// assert!(!chunked_array.is_empty()); + /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Returns this chunked array's data type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// use arrow_schema::DataType; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.data_type(), &DataType::Int32); + /// ``` pub fn data_type(&self) -> &DataType { self.chunks.first().unwrap().data_type() } + /// Returns the number of nulls in this chunked array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.null_count(), 0); + /// ``` pub fn null_count(&self) -> usize { self.chunks() .iter() .fold(0, |acc, chunk| acc + chunk.null_count()) } + /// Returns an immutable reference to this chunked array's chunks. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.chunks(); + /// ``` pub fn chunks(&self) -> &[A] { self.chunks.as_slice() } + /// Applies an operation over each chunk of this chunked array. + /// + /// If the `rayon` feature is enabled, this will be done in parallel. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.map(|chunk| chunk.len()); + /// assert_eq!(lengths, vec![1, 2]); + /// ``` #[allow(dead_code)] pub fn map R + Sync + Send, R: Send>(&self, map_op: F) -> Vec { #[cfg(feature = "rayon")] @@ -82,7 +201,22 @@ impl ChunkedArray { self.chunks.iter().map(map_op).collect() } } - + /// Applies an operation over each chunk of this chunked array, returning a `Result`. + /// + /// If the `rayon` feature is enabled, this will be done in parallel. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.try_map(|chunk| Ok(chunk.len())).unwrap(); + /// assert_eq!(lengths, vec![1, 2]); + /// ``` pub fn try_map Result + Sync + Send, R: Send>( &self, map_op: F, @@ -118,9 +252,9 @@ impl AsRef<[A]> for ChunkedArray { /// This can be thought of as a geometry column in a table, as Table objects normally have internal /// batches. /// -/// ## Invariants: +/// # Invariants /// -/// - Must have at least one chunk +/// Must have at least one chunk. #[derive(Debug, Clone, PartialEq)] pub struct ChunkedGeometryArray { pub(crate) chunks: Vec, @@ -128,6 +262,17 @@ pub struct ChunkedGeometryArray { } impl ChunkedGeometryArray { + /// Creates a new chunked geometry array from multiple arrays. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// ``` pub fn new(chunks: Vec) -> Self { // TODO: assert all equal extension fields let mut length = 0; @@ -135,32 +280,125 @@ impl ChunkedGeometryArray { Self { chunks, length } } - // TODO: check/assert on creation that all are the same so we can be comfortable here only - // taking the first. + /// Returns the extension field for this chunked geometry array. + /// + /// TODO: check/assert on creation that all are the same so we can be comfortable here only + /// taking the first. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let field = chunked_array.extension_field(); + /// assert_eq!(field.name(), "geometry"); + /// ``` pub fn extension_field(&self) -> Arc { self.chunks.first().unwrap().extension_field() } + /// Converts this chunked geometry array into its inner chunks. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.into_inner(); + /// assert_eq!(chunks.len(), 2); + /// ``` pub fn into_inner(self) -> Vec { self.chunks } + /// Returns this chunked geometry array length. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.), &geo::point!(x: 5., y: 6.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.len(), 3); + /// ``` pub fn len(&self) -> usize { self.length } + /// Returns true if this chunked geometry array is empty. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// assert!(!chunked_array.is_empty()); + /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Returns an immutable reference to this array's chunks. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.chunks(); + /// ``` pub fn chunks(&self) -> &[G] { self.chunks.as_slice() } + /// Returns this array's geo data type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray, datatypes::GeoDataType}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// assert!(matches!(chunked_array.data_type(), GeoDataType::Point(_, _))); + /// ``` pub fn data_type(&self) -> GeoDataType { self.chunks.first().unwrap().data_type() } + /// Converts this chunked array into a vector, where each element is the output of `map_op` for one chunk. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::ChunkedGeometryArray, + /// array::PointArray, + /// trait_::GeometryArrayTrait, + /// datatypes::GeoDataType, + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.into_map(|chunk| chunk.len()); // chunked_array is consumed + /// assert_eq!(lengths, vec![1, 1]); + /// ``` pub fn into_map R + Sync + Send, R: Send>(self, map_op: F) -> Vec { #[cfg(feature = "rayon")] { @@ -178,6 +416,24 @@ impl ChunkedGeometryArray { } } + /// Maps this chunked array into a vector, where each element is the output of `map_op` for one chunk. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::ChunkedGeometryArray, + /// array::PointArray, + /// trait_::GeometryArrayTrait, + /// datatypes::GeoDataType, + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.map(|chunk| chunk.len()); + /// assert_eq!(lengths, vec![1, 1]); + /// ``` pub fn map R + Sync + Send, R: Send>(&self, map_op: F) -> Vec { #[cfg(feature = "rayon")] { @@ -195,6 +451,24 @@ impl ChunkedGeometryArray { } } + /// Maps this chunked array into a vector, where each element is the `Result` output of `map_op` for one chunk. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::ChunkedGeometryArray, + /// array::PointArray, + /// trait_::GeometryArrayTrait, + /// datatypes::GeoDataType, + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.try_map(|chunk| Ok(chunk.len())).unwrap(); + /// assert_eq!(lengths, vec![1, 1]); + /// ``` pub fn try_map Result + Sync + Send, R: Send>( &self, map_op: F, @@ -212,6 +486,22 @@ impl ChunkedGeometryArray { } impl<'a, G: GeometryArrayTrait + GeometryArrayAccessor<'a>> ChunkedGeometryArray { + /// Returns a value from this chunked array, ignoring validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let value = chunked_array.value(1); // geoarrow::scalar::Point<2> + /// ``` + /// + /// # Panics + /// + /// Panics if the index exceeds the size of this chunked array. pub fn value(&'a self, index: usize) -> G::Item { assert!(index <= self.len()); let mut index = index; @@ -225,6 +515,22 @@ impl<'a, G: GeometryArrayTrait + GeometryArrayAccessor<'a>> ChunkedGeometryArray unreachable!() } + /// Returns a value from this chunked array, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let value = chunked_array.get(1).unwrap(); // geoarrow::scalar::Point<2> + /// ``` + /// + /// # Panics + /// + /// Panics if the index exceeds the size of this chunked array. pub fn get(&'a self, index: usize) -> Option { assert!(index <= self.len()); let mut index = index; @@ -247,20 +553,31 @@ impl TryFrom> for ChunkedGeometryArray { } } +/// A chunked point array. pub type ChunkedPointArray = ChunkedGeometryArray>; +/// A chunked line string array. pub type ChunkedLineStringArray = ChunkedGeometryArray>; +/// A chunked polygon array. pub type ChunkedPolygonArray = ChunkedGeometryArray>; +/// A chunked multi-point array. pub type ChunkedMultiPointArray = ChunkedGeometryArray>; +/// A chunked mutli-line string array. pub type ChunkedMultiLineStringArray = ChunkedGeometryArray>; +/// A chunked multi-polygon array. pub type ChunkedMultiPolygonArray = ChunkedGeometryArray>; +/// A chunked mixed geometry array. pub type ChunkedMixedGeometryArray = ChunkedGeometryArray>; +/// A chunked geometry collection array. pub type ChunkedGeometryCollectionArray = ChunkedGeometryArray>; +/// A chunked WKB array. pub type ChunkedWKBArray = ChunkedGeometryArray>; +/// A chunked rect array. pub type ChunkedRectArray = ChunkedGeometryArray>; +/// A chunked unknown geometry array. #[allow(dead_code)] pub type ChunkedUnknownGeometryArray = ChunkedGeometryArray>; @@ -271,26 +588,128 @@ pub type ChunkedUnknownGeometryArray = ChunkedGeometryArray = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let any = chunked_array.as_any(); + /// ``` fn as_any(&self) -> &dyn Any; /// Returns a reference to the [`GeoDataType`] of this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let data_type = chunked_array.data_type(); + /// ``` fn data_type(&self) -> GeoDataType; - /// Returns an Arrow [`Field`] describing this chunked array. This field will always have the - /// `ARROW:extension:name` key of the field metadata set, signifying that it describes a - /// GeoArrow extension type. + /// Returns an Arrow [`Field`] describing this chunked array. + /// + /// This field will always have the `ARROW:extension:name` key of the field + /// metadata set, signifying that it describes a GeoArrow extension type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let field = chunked_array.extension_field(); + /// assert_eq!(field.metadata()["ARROW:extension:name"], "geoarrow.point"); + /// ``` fn extension_field(&self) -> Arc; - /// Access the geometry chunks contained within this chunked array. + /// Returns a vector of references to the geometry chunks contained within this chunked array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.geometry_chunks(); + /// assert_eq!(chunks.len(), 2); + /// ``` fn geometry_chunks(&self) -> Vec<&dyn GeometryArrayTrait>; - /// The number of chunks in this chunked array. + /// Returns the number of chunks in this chunked array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.num_chunks(), 2); + /// ``` fn num_chunks(&self) -> usize; + /// Returns a reference to this chunked geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let array_ref = chunked_array.as_ref(); + /// ``` fn as_ref(&self) -> &dyn ChunkedGeometryArrayTrait; + /// Returns a vector of references to the underlying arrow arrays. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let arrays = chunked_array.array_refs(); + /// ``` fn array_refs(&self) -> Vec>; } @@ -446,8 +865,22 @@ impl ChunkedGeometryArrayTrait for ChunkedRectArray { } } -/// Construct +/// Constructs a chunked geometry array from arrow chunks. +/// /// Does **not** parse WKB. Will return a ChunkedWKBArray for WKB input. +/// +/// # Examples +/// +/// ``` +/// use geoarrow::{GeometryArrayTrait, array::PointArray}; +/// use std::sync::Arc; +/// +/// let array: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); +/// let field = array.extension_field(); +/// let array = array.into_array_ref(); +/// let chunks = vec![array.as_ref()]; +/// let chunked_array = geoarrow::chunked_array::from_arrow_chunks(chunks.as_slice(), &field).unwrap(); +/// ``` pub fn from_arrow_chunks( chunks: &[&dyn Array], field: &Field, @@ -515,6 +948,18 @@ pub fn from_arrow_chunks( } } +/// Creates a chunked geometry array from geoarrow chunks. +/// +/// # Examples +/// +/// ``` +/// use geoarrow::{GeometryArrayTrait, array::PointArray}; +/// +/// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); +/// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); +/// let chunks = vec![array_0.as_ref(), array_1.as_ref()]; +/// let chunked_array = geoarrow::chunked_array::from_geoarrow_chunks(chunks.as_slice()).unwrap(); +/// ``` pub fn from_geoarrow_chunks( chunks: &[&dyn GeometryArrayTrait], ) -> Result> { diff --git a/src/chunked_array/mod.rs b/src/chunked_array/mod.rs deleted file mode 100644 index 8100e2a4..00000000 --- a/src/chunked_array/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -//! Contains implementations of _chunked_ GeoArrow arrays. -//! -//! In contrast to the structures in [array](../array), these data structures only have contiguous -//! memory within each individual _chunk_. These chunked arrays are essentially wrappers around a -//! `Vec` of geometry arrays. -//! -//! Additionally, if the `rayon` feature is active, operations on chunked arrays will automatically -//! be parallelized across each chunk. - -#[allow(clippy::module_inception)] -mod chunked_array; - -pub use chunked_array::{ - from_arrow_chunks, from_geoarrow_chunks, ChunkedArray, ChunkedGeometryArray, - ChunkedGeometryArrayTrait, ChunkedGeometryCollectionArray, ChunkedLineStringArray, - ChunkedMixedGeometryArray, ChunkedMultiLineStringArray, ChunkedMultiPointArray, - ChunkedMultiPolygonArray, ChunkedPointArray, ChunkedPolygonArray, ChunkedRectArray, - ChunkedWKBArray, -}; diff --git a/src/datatypes.rs b/src/datatypes.rs index 5416570f..3f2701c0 100644 --- a/src/datatypes.rs +++ b/src/datatypes.rs @@ -11,14 +11,36 @@ use crate::array::metadata::ArrayMetadata; use crate::array::CoordType; use crate::error::{GeoArrowError, Result}; -/// The dimension of the geometry array +/// The dimension of the geometry array. +/// +/// [Dimension] implements [TryFrom] for integers: +/// +/// ``` +/// use geoarrow::datatypes::Dimension; +/// +/// assert_eq!(Dimension::try_from(2).unwrap(), Dimension::XY); +/// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Dimension { + /// Two-dimensional. XY, + + /// Three-dimensional. XYZ, } impl Dimension { + /// Returns the size of this dimension. + /// + /// # Examples + /// + /// + /// ``` + /// use geoarrow::datatypes::Dimension; + /// + /// assert_eq!(Dimension::XY.size(), 2); + /// assert_eq!(Dimension::XYZ.size(), 3); + /// ``` pub fn size(&self) -> usize { match self { Dimension::XY => 2, @@ -49,9 +71,9 @@ impl TryFrom for Dimension { } } -/// The geometry type is designed to aid in downcasting from dynamically-typed geometry arrays by -/// uniquely identifying the physical buffer layout of each geometry array type. +/// The geodata type is designed to aid in downcasting from dynamically-typed geometry arrays. /// +/// The geodata type uniquely identifies the physical buffer layout of each geometry array type. /// It must always be possible to accurately downcast from a `dyn &GeometryArrayTrait` or `dyn /// &ChunkedGeometryArrayTrait` to a unique concrete array type using this enum. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -331,10 +353,20 @@ fn rect_data_type(dim: Dimension) -> DataType { } impl GeoDataType { - /// Convert a [`GeoDataType`] into the relevant arrow [`DataType`]. + /// Converts a [`GeoDataType`] into the relevant arrow [`DataType`]. /// /// Note that an arrow [`DataType`] will lose the accompanying GeoArrow metadata if it is not /// part of a [`Field`] with GeoArrow extension metadata in its field metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::CoordType, datatypes::{GeoDataType, Dimension}}; + /// use arrow_schema::DataType; + /// + /// let data_type = GeoDataType::Point(CoordType::Interleaved, Dimension::XY).to_data_type(); + /// assert!(matches!(data_type, DataType::FixedSizeList(_, _))); + /// ``` pub fn to_data_type(&self) -> DataType { use GeoDataType::*; match self { @@ -367,7 +399,16 @@ impl GeoDataType { } } - /// Get the GeoArrow extension name pertaining to this data type. + /// Returns the GeoArrow extension name pertaining to this data type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::datatypes::GeoDataType; + /// + /// let geo_data_type = GeoDataType::Point(Default::default(), 2.try_into().unwrap()); + /// assert_eq!(geo_data_type.extension_name(), "geoarrow.point") + /// ``` pub fn extension_name(&self) -> &'static str { use GeoDataType::*; match self { @@ -386,8 +427,20 @@ impl GeoDataType { } } - /// Convert this [`GeoDataType`] into an arrow [`Field`], maintaining GeoArrow extension + /// Converts this [`GeoDataType`] into an arrow [`Field`], maintaining GeoArrow extension /// metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::datatypes::GeoDataType; + /// + /// let geo_data_type = GeoDataType::Point(Default::default(), 2.try_into().unwrap()); + /// let field = geo_data_type.to_field("geometry", false); + /// assert_eq!(field.name(), "geometry"); + /// assert!(!field.is_nullable()); + /// assert_eq!(field.metadata()["ARROW:extension:name"], "geoarrow.point"); + /// ``` pub fn to_field>(&self, name: N, nullable: bool) -> Field { let extension_name = self.extension_name(); let mut metadata = HashMap::with_capacity(1); @@ -398,6 +451,20 @@ impl GeoDataType { Field::new(name, self.to_data_type(), nullable).with_metadata(metadata) } + /// Converts this geo-data type to a field with the additional [ArrayMetadata]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::metadata::{ArrayMetadata, Edges}, datatypes::GeoDataType}; + /// + /// let geo_data_type = GeoDataType::Point(Default::default(), 2.try_into().unwrap()); + /// let metadata = ArrayMetadata { + /// crs: None, + /// edges: Some(Edges::Spherical), + /// }; + /// let field = geo_data_type.to_field_with_metadata("geometry", false, &metadata); + /// ``` pub fn to_field_with_metadata>( &self, name: N, @@ -417,6 +484,16 @@ impl GeoDataType { Field::new(name, self.to_data_type(), nullable).with_metadata(metadata) } + /// Returns this geodata type with the provided [CoordType]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::CoordType, datatypes::GeoDataType}; + /// + /// let geo_data_type = GeoDataType::Point(CoordType::Interleaved, 2.try_into().unwrap()); + /// let separated_geo_data_type = geo_data_type.with_coord_type(CoordType::Separated); + /// ``` pub fn with_coord_type(self, coord_type: CoordType) -> GeoDataType { use GeoDataType::*; match self { @@ -441,6 +518,16 @@ impl GeoDataType { } } + /// Returns this geodata type with the provided [Dimension]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::datatypes::GeoDataType; + /// + /// let geo_data_type = GeoDataType::Point(Default::default(), 2.try_into().unwrap()); + /// let geo_data_type_3d = geo_data_type.with_dimension(3.try_into().unwrap()); + /// ``` pub fn with_dimension(self, dim: Dimension) -> GeoDataType { use GeoDataType::*; match self { diff --git a/src/error.rs b/src/error.rs index 0a07483a..bc4cea89 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,6 +9,7 @@ use thiserror::Error; #[derive(Error, Debug)] #[non_exhaustive] pub enum GeoArrowError { + /// Incorrect type was passed to an operation. #[error("Incorrect type passed to operation: {0}")] IncorrectType(Cow<'static, str>), @@ -16,61 +17,77 @@ pub enum GeoArrowError { #[error("Not yet implemented: {0}")] NotYetImplemented(String), + /// General error. #[error("General error: {0}")] General(String), /// Whenever pushing to a container fails because it does not support more entries. + /// /// The solution is usually to use a higher-capacity container-backing type. #[error("Overflow")] Overflow, + /// [ArrowError] #[error(transparent)] Arrow(#[from] ArrowError), + /// [geo::vincenty_distance::FailedToConvergeError] #[error(transparent)] FailedToConvergeError(#[from] geo::vincenty_distance::FailedToConvergeError), + /// [gdal::errors::GdalError] #[cfg(feature = "gdal")] #[error(transparent)] GdalError(#[from] gdal::errors::GdalError), + /// [geozero::error::GeozeroError] #[cfg(feature = "geozero")] #[error(transparent)] GeozeroError(#[from] geozero::error::GeozeroError), + /// [geos::Error] #[cfg(feature = "geos")] #[error(transparent)] GeosError(#[from] geos::Error), + /// [object_store::Error] #[cfg(feature = "flatgeobuf_async")] #[error(transparent)] ObjectStoreError(#[from] object_store::Error), + /// [parquet::errors::ParquetError] #[cfg(feature = "parquet")] #[error(transparent)] ParquetError(#[from] parquet::errors::ParquetError), + /// [polylabel::errors::PolylabelError] #[cfg(feature = "polylabel")] #[error(transparent)] PolylabelError(#[from] polylabel::errors::PolylabelError), + /// [proj::ProjError] #[cfg(feature = "proj")] #[error(transparent)] ProjError(#[from] proj::ProjError), + /// [flatgeobuf::Error] #[cfg(feature = "flatgeobuf")] #[error(transparent)] FlatgeobufError(#[from] flatgeobuf::Error), + /// [std::io::Error] #[error(transparent)] IOError(#[from] std::io::Error), + /// [serde_json::Error] #[error(transparent)] SerdeJsonError(#[from] serde_json::Error), + /// [sqlx::Error] #[cfg(feature = "postgis")] #[error(transparent)] SqlxError(#[from] sqlx::Error), } +/// Crate-specific result type. pub type Result = std::result::Result; diff --git a/src/geo_traits/mod.rs b/src/geo_traits/mod.rs index d2105fab..1c8429a6 100644 --- a/src/geo_traits/mod.rs +++ b/src/geo_traits/mod.rs @@ -8,6 +8,8 @@ //! [here](https://github.com/georust/geo/pull/1019)) but that is vendored into this repository for //! use internally, such as in the WKB parser. +#![allow(missing_docs)] // FIXME + pub use coord::CoordTrait; pub use geometry::{GeometryTrait, GeometryType}; pub use geometry_collection::GeometryCollectionTrait; diff --git a/src/indexed/mod.rs b/src/indexed/mod.rs index 4c8170a4..0af9abcb 100644 --- a/src/indexed/mod.rs +++ b/src/indexed/mod.rs @@ -1,5 +1,7 @@ -//! Indexed geometry arrays: arrays which are associated with a spatial index for efficient boolean -//! operations. +//! Indexed geometry arrays are associated with a spatial index for efficient +//! boolean operations. + +#![allow(missing_docs)] // FIXME pub mod array; pub mod chunked; diff --git a/src/io/mod.rs b/src/io/mod.rs index b89b15e8..deecc31e 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -1,6 +1,8 @@ //! Reader and writer implementations of many common geospatial file formats, including //! interoperability with the `geozero` crate. +#![allow(missing_docs)] // FIXME + #[cfg(feature = "csv")] pub mod csv; #[cfg(feature = "geozero")] diff --git a/src/lib.rs b/src/lib.rs index 7fcd9a61..7978d7fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,60 @@ //! A Rust implementation of the [GeoArrow](https://github.com/geoarrow/geoarrow) specification, -//! plus algorithms implemented on and returning these GeoArrow arrays. +//! including algorithms implemented on and returning these GeoArrow arrays. +//! +//! # Reading and writing +//! +//! The [io] module has functions for reading and writing GeoArrow data from a variety of formats. +//! To use most format readers and writers, you must enable their corresponding feature. +//! For example, to convert between [geosjon](https://geojson.org/) and GeoArrow, enable the `geozero` feature in your `Cargo.toml`: +//! +//! ```toml +//! #[dependencies] +//! geoarrow = { version = "*", features = ["geozero"] } +//! ``` +//! +//! Then: +//! +//! ``` +//! # #[cfg(feature = "geozero")] +//! # { +//! use std::{io::Cursor, fs::File}; +//! +//! // Reads geojson from a file into a GeoArrow table. +//! let file = File::open("fixtures/roads.geojson").unwrap(); +//! let table = geoarrow::io::geojson::read_geojson(file, None).unwrap(); +//! +//! // Writes that table to a cursor as JSON, then reads it back into a `serde_json::Value`. +//! let mut cursor = Cursor::new(Vec::new()); +//! geoarrow::io::geojson::write_geojson(table, &mut cursor); +//! let value: serde_json::Value = serde_json::from_slice(&cursor.into_inner()).unwrap(); +//! # } +//! ``` +//! +//! See the [io] module for more information on the available formats and their features. +//! +//! # Constructing +//! +//! You can build GeoArrow arrays all at once from [mod@geo] structures: +//! +//! ``` +//! use geoarrow::array::PointArray; +//! +//! let point = geo::point!(x: 1., y: 2.); +//! let array: PointArray<2> = vec![point].as_slice().into(); +//! ``` +//! +//! Or you can use builders, e.g. [PointBuilder](crate::array::PointBuilder): +//! +//! ``` +//! use geoarrow::array::PointBuilder; +//! let mut builder = PointBuilder::new(); +//! builder.push_point(Some(&geo::point!(x: 1., y: 2.))); +//! let array = builder.finish(); +//! ``` #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![cfg_attr(not(test), deny(unused_crate_dependencies))] +#![deny(missing_docs)] // FIXME some modules allow missing docs pub use trait_::GeometryArrayTrait; diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs index 0cf05221..2adf9745 100644 --- a/src/scalar/mod.rs +++ b/src/scalar/mod.rs @@ -1,5 +1,7 @@ //! GeoArrow scalars, which are references onto a full GeoArrow array at a specific index. +#![allow(missing_docs)] // FIXME + pub use binary::{OwnedWKB, WKB}; pub use coord::{Coord, InterleavedCoord, SeparatedCoord}; pub use geometry::{Geometry, OwnedGeometry}; diff --git a/src/table/mod.rs b/src/table.rs similarity index 64% rename from src/table/mod.rs rename to src/table.rs index 2f9bef1d..ec48f28f 100644 --- a/src/table/mod.rs +++ b/src/table.rs @@ -1,5 +1,6 @@ -//! Abstractions for Arrow tables. Useful for dataset IO where data will have geometries and -//! attributes. +//! Abstractions for Arrow tables. +//! +//! Useful for dataset IO where data will have geometries and attributes. use std::ops::Deref; use std::sync::Arc; @@ -41,6 +42,26 @@ pub struct Table { } impl Table { + /// Creates a new table from a schema and a vector of record batches. + /// + /// Returns an error if a record batch's schema fields do not match the + /// top-level schema's fields. + /// + /// # Examples + /// + /// ``` + /// use arrow_array::RecordBatch; + /// use arrow_schema::{Schema, SchemaRef}; + /// use geoarrow::{GeometryArrayTrait, array::PointArray, table::Table}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let field = array.extension_field(); + /// let schema: SchemaRef = Schema::new(vec![field]).into(); + /// let columns = vec![array.into_array_ref()]; + /// let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); + /// let table = Table::try_new(schema, vec![batch]).unwrap(); + /// ``` pub fn try_new(schema: SchemaRef, batches: Vec) -> Result { for batch in batches.iter() { // Don't check schema metadata in comparisons. @@ -60,6 +81,39 @@ impl Table { Ok(Self { schema, batches }) } + /// Creates a new table from a schema, a vector of record batches, and a chunked geometry array. + /// + /// Returns an error if a record batch's schema fields do not match the + /// top-level schema's fields, or if the batches are empty. + /// + /// # Examples + /// + /// ``` + /// use arrow_array::{Int32Array, RecordBatch}; + /// use arrow_schema::{DataType, Schema, SchemaRef, Field}; + /// use geoarrow::{ + /// GeometryArrayTrait, + /// array::PointArray, + /// table::Table, + /// chunked_array::ChunkedGeometryArray + /// }; + /// use std::sync::Arc; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array]); + /// + /// let id_array = Int32Array::from(vec![1]); + /// let schema_ref = Arc::new(Schema::new(vec![ + /// Field::new("id", DataType::Int32, false) + /// ])); + /// let batch = RecordBatch::try_new( + /// schema_ref.clone(), + /// vec![Arc::new(id_array)] + /// ).unwrap(); + /// + /// let table = Table::from_arrow_and_geometry(schema_ref, vec![batch], Arc::new(chunked_array)).unwrap(); + /// ``` pub fn from_arrow_and_geometry( schema: SchemaRef, batches: Vec, @@ -83,7 +137,24 @@ impl Table { Self::try_new(new_schema, new_batches) } - /// Cast the geometry at `index` to a different data type + /// Casts the geometry at `index` to a different data type + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// use geoarrow::{array::CoordType, datatypes::{GeoDataType, Dimension}}; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let mut table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let index = table.default_geometry_column_idx().unwrap(); + /// + /// // Change to separated storage of coordinates + /// table.cast_geometry(index, &GeoDataType::LineString(CoordType::Separated, Dimension::XY)).unwrap(); + /// # } + /// ``` pub fn cast_geometry(&mut self, index: usize, to_type: &GeoDataType) -> Result<()> { let orig_field = self.schema().field(index); @@ -102,9 +173,13 @@ impl Table { Ok(()) } - /// Parse the geometry at `index` to a GeoArrow-native type + /// Parse the WKB geometry at `index` to a GeoArrow-native type. + /// + /// Use [Self::cast_geometry] if you know the target data type. /// - /// Use [Self::cast_geometry] if you know the target data type + /// # Examples + /// + /// TODO pub fn parse_geometry_to_native( &mut self, index: usize, @@ -182,6 +257,7 @@ impl Table { // Note: This function is relatively complex because we want to parse any WKB columns to // geoarrow-native arrays #[deprecated] + #[allow(missing_docs)] pub fn from_arrow( batches: Vec, schema: SchemaRef, @@ -292,26 +368,112 @@ impl Table { Table::try_new(new_schema, new_record_batches) } + /// Returns the length of this table. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// assert_eq!(table.len(), 21); + /// # } + /// ``` pub fn len(&self) -> usize { self.batches.iter().fold(0, |sum, val| sum + val.num_rows()) } + /// Returns true if this table is empty. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// assert!(!table.is_empty()); + /// # } + /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Consumes this table, returning its schema and its record batches. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let (schema, record_batches) = table.into_inner(); + /// # } + /// ``` pub fn into_inner(self) -> (SchemaRef, Vec) { (self.schema, self.batches) } + /// Returns a reference to this table's schema. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let schema = table.schema(); + /// # } + /// ``` pub fn schema(&self) -> &SchemaRef { &self.schema } + /// Returns an immutable slice of this table's record batches. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let record_batches = table.batches(); + /// # } + /// ``` pub fn batches(&self) -> &[RecordBatch] { &self.batches } + /// Returns this table's default geometry index. + /// + /// Returns an error if there is more than one geometry column. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// assert_eq!(table.default_geometry_column_idx().unwrap(), 6); + /// # } + /// ``` pub fn default_geometry_column_idx(&self) -> Result { let geom_col_indices = self.schema.as_ref().geometry_columns(); if geom_col_indices.len() != 1 { @@ -324,7 +486,23 @@ impl Table { } } - /// Access the geometry chunked array at the provided column index. + /// Returns a reference to the chunked geometry array at the given index. + /// + /// If index is `None` and there is only one geometry column, that array + /// will be returned. Otherwise, this method will return an error. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let chunked_array = table.geometry_column(None).unwrap(); // there's only one geometry column + /// # } + /// ``` pub fn geometry_column( &self, index: Option, @@ -351,10 +529,24 @@ impl Table { from_arrow_chunks(array_refs.as_slice(), field) } - /// Access all geometry chunked arrays from the table. + /// Returns a vector of references to all geometry chunked arrays. /// /// This may return an empty `Vec` if there are no geometry columns in the table, or may return /// more than one element if there are multiple geometry columns. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let chunked_arrays = table.geometry_columns().unwrap(); + /// assert_eq!(chunked_arrays.len(), 1); + /// # } + /// ``` pub fn geometry_columns(&self) -> Result>> { self.schema .as_ref() @@ -364,12 +556,43 @@ impl Table { .collect() } - /// The number of columns in this table. + /// Returns the number of columns in this table. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// assert_eq!(table.num_columns(), 7); + /// # } + /// ``` pub fn num_columns(&self) -> usize { self.schema.fields().len() } - /// Replace the column at index `i` with the given field and arrays. + /// Replaces the column at index `i` with the given field and arrays. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::{sync::Arc, fs::File}; + /// use arrow_schema::{DataType, Field}; + /// use arrow_array::Int32Array; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let mut table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let indices: Vec<_> = (0..table.len()).map(|n| i32::try_from(n).unwrap()).collect(); + /// let array = Int32Array::from(indices); + /// let field = Field::new("id", DataType::Int32, false); + /// table.set_column(0, field.into(), vec![Arc::new(array)]).unwrap(); + /// # } + /// ``` pub fn set_column( &mut self, i: usize, @@ -416,6 +639,26 @@ impl Table { ChunkedArray::new(removed_chunks) } + /// Appends a column to this table, returning its new index. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::{sync::Arc, fs::File}; + /// use arrow_schema::{DataType, Field}; + /// use arrow_array::Int32Array; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let mut table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let indices: Vec<_> = (0..table.len()).map(|n| i32::try_from(n).unwrap()).collect(); + /// let array = Int32Array::from(indices); + /// let field = Field::new("id", DataType::Int32, false); + /// let index = table.append_column(field.into(), vec![Arc::new(array)]).unwrap(); + /// assert_eq!(index, 7); + /// # } + /// ``` pub fn append_column(&mut self, field: FieldRef, column: Vec>) -> Result { assert_eq!(self.batches().len(), column.len()); diff --git a/src/trait_.rs b/src/trait_.rs index a2fba6e5..5a88b71c 100644 --- a/src/trait_.rs +++ b/src/trait_.rs @@ -1,4 +1,4 @@ -//! Defines [`GeometryArrayTrait`], which all geometry arrays implement. +//! Defines [`GeometryArrayTrait`], which all geometry arrays implement, and other traits. use crate::array::metadata::ArrayMetadata; use crate::array::{CoordBuffer, CoordType}; @@ -18,22 +18,12 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { /// Returns the array as [`Any`] so that it can be /// downcasted to a specific implementation. /// - /// # Example: + /// # Examples /// /// ``` - /// //use geoarrow::datatypes::GeoDataType; - /// //use geoarrow::array::PointArray; - /// //use geoarrow::GeometryArrayTrait; - /// //use geo::point; - /// - /// //let point = point!(x: 1., y: 2.); - /// //let point_array: PointArray = vec![point].into(); - /// - /// //let geometry_array = Arc::new(point_array) as Arc; - /// - /// # use std::sync::Arc; - /// # use arrow_array::{Int32Array, RecordBatch}; - /// # use arrow_schema::{Schema, Field, DataType, ArrowError}; + /// use std::sync::Arc; + /// use arrow_array::{Int32Array, RecordBatch}; + /// use arrow_schema::{Schema, Field, DataType, ArrowError}; /// /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]); /// let batch = RecordBatch::try_new( @@ -51,52 +41,144 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { /// Returns a the [`GeoDataType`] of this array. /// - /// # Example: + /// # Examples /// /// ``` - /// use geoarrow::datatypes::GeoDataType; - /// use geoarrow::array::PointArray; - /// use geoarrow::GeometryArrayTrait; - /// use geo::point; + /// use geoarrow::{array::PointArray, datatypes::GeoDataType, GeometryArrayTrait}; /// - /// let point = point!(x: 1., y: 2.); + /// let point = geo::point!(x: 1., y: 2.); /// let point_array: PointArray<2> = vec![point].as_slice().into(); - /// /// assert!(matches!(point_array.data_type(), GeoDataType::Point(_, _))); /// ``` fn data_type(&self) -> GeoDataType; - /// Get the logical DataType of this array. + /// Returns the [DataType] of this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, datatypes::GeoDataType, GeometryArrayTrait}; + /// use arrow_schema::DataType; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(matches!(point_array.storage_type(), DataType::FixedSizeList(_, _))); + /// ``` fn storage_type(&self) -> DataType; - /// Get the extension type of this array, as [defined by the GeoArrow + /// Returns the extension type of this array, as [defined by the GeoArrow /// specification](https://github.com/geoarrow/geoarrow/blob/main/extension-types.md). /// - /// Always returns `DataType::Extension`. + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// let field = point_array.extension_field(); + /// assert_eq!(field.name(), "geometry"); + /// ``` fn extension_field(&self) -> Arc; - /// Get the extension name of this array. + /// Returns the extension name of this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert_eq!(point_array.extension_name(), "geoarrow.point"); + /// ``` fn extension_name(&self) -> &str; - /// Convert this array into an arced [`arrow`] array. - /// # Implementation + /// Converts this array into an arced [`arrow`] array, consuming the original array. + /// /// This is `O(1)`. + /// + /// # Examples + /// + /// ``` + /// + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// let array_ref = point_array.into_array_ref(); + /// ``` #[must_use] fn into_array_ref(self) -> ArrayRef; + /// Converts this array into an arced [`arrow`] array. + /// + /// This is `O(1)`. + /// + /// # Examples + /// + /// ``` + /// + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// let array_ref = point_array.to_array_ref(); + /// ``` #[must_use] fn to_array_ref(&self) -> ArrayRef; - /// Get the coordinate type of this geometry array, either interleaved or separated. + /// Returns the [CoordType] of this geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::{PointArray, CoordType}, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert_eq!(point_array.coord_type(), CoordType::Interleaved); + /// ``` fn coord_type(&self) -> CoordType; + /// Converts this array to the same type of array but with the provided [CoordType]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::{PointArray, CoordType}, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// let point_array = point_array.to_coord_type(CoordType::Separated); + /// ``` #[must_use] fn to_coord_type(&self, coord_type: CoordType) -> Arc; /// The number of geometries contained in this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert_eq!(point_array.len(), 1); + /// ``` fn len(&self) -> usize; /// Returns `true` if the array is empty. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(!point_array.is_empty()); + /// ``` fn is_empty(&self) -> bool { self.len() == 0 } @@ -118,21 +200,69 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { /// ``` fn nulls(&self) -> Option<&NullBuffer>; + /// Returns this array's metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let metadata = array.metadata(); + /// ``` fn metadata(&self) -> Arc; + /// Returns a geometry array reference that includes the provided metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::{PointArray, metadata::{ArrayMetadata, Edges}}}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let metadata = ArrayMetadata { + /// crs: None, + /// edges: Some(Edges::Spherical), + /// }; + /// let metadata = array.with_metadata(metadata.into()); + /// ``` #[must_use] fn with_metadata(&self, metadata: Arc) -> GeometryArrayRef; - /// The number of null slots in this array. - /// # Implementation + /// Returns the number of null slots in this array. + /// /// This is `O(1)` since the number of null elements is pre-computed. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert_eq!(array.null_count(), 0); + /// ``` #[inline] fn null_count(&self) -> usize { self.nulls().map(|x| x.null_count()).unwrap_or(0) } /// Returns whether slot `i` is null. - /// # Panic + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(!array.is_null(0)); + /// ``` + /// + /// # Panics + /// /// Panics iff `i >= self.len()`. #[inline] fn is_null(&self, i: usize) -> bool { @@ -140,13 +270,36 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { } /// Returns whether slot `i` is valid. - /// # Panic + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(array.is_valid(0)); + /// ``` + /// + /// # Panics + /// /// Panics iff `i >= self.len()`. #[inline] fn is_valid(&self, i: usize) -> bool { !self.is_null(i) } + /// Returns a reference to this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let array_ref = array.as_ref(); + /// ``` fn as_ref(&self) -> &dyn GeometryArrayTrait; // /// Clones this [`GeometryArray`] with a new new assigned bitmap. @@ -155,9 +308,10 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { // fn with_validity(&self, validity: Option) -> Box; } +/// Type alias for a dynamic reference to something that implements [GeometryArrayTrait]. pub type GeometryArrayRef = Arc; -/// A generic trait for accessing the values of an [`Array`] +/// A trait for accessing the values of an [`Array`]. /// /// # Validity /// @@ -165,7 +319,7 @@ pub type GeometryArrayRef = Arc; /// within the bounds `0..Array::len`, including for null indexes where [`Array::is_null`] is true. /// /// The value at null indexes is unspecified, and implementations must not rely on a specific -/// value such as [`Default::default`] being returned, however, it must not be undefined +/// value such as [`Default::default`] being returned, however, it must not be undefined. pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { /// The [geoarrow scalar object][crate::scalar] for this geometry array type. type Item: Send + Sync + GeometryScalarTrait; @@ -173,20 +327,58 @@ pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { /// The [`geo`] scalar object for this geometry array type. type ItemGeo: From; - /// Returns the element at index `i` + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray, geo_traits::PointTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let value = array.value(0); // geoarrow::scalar::Point<2> + /// assert_eq!(value.x(), 1.); + /// assert_eq!(value.y(), 2.); + /// ``` + /// /// # Panics - /// Panics if the value is outside the bounds of the array + /// + /// Panics if the value is outside the bounds of the array. fn value(&'a self, index: usize) -> Self::Item { assert!(index <= self.len()); unsafe { self.value_unchecked(index) } } - /// Returns the element at index `i` + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// unsafe { + /// let value = array.value_unchecked(0); // geoarrow::scalar::Point<2> + /// } + /// ``` + /// /// # Safety + /// /// Caller is responsible for ensuring that the index is within the bounds of the array unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item; - /// Access the value at slot `i` as an Arrow scalar, considering validity. + /// Returns the value at slot `i` as an Arrow scalar, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(array.get(0).is_some()); + /// ``` fn get(&'a self, index: usize) -> Option { if self.is_null(index) { return None; @@ -195,7 +387,19 @@ pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { Some(self.value(index)) } - /// Access the value at slot `i` as an Arrow scalar, considering validity. + /// Returns the value at slot `i` as an Arrow scalar, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// unsafe { + /// assert!(array.get_unchecked(0).is_some()); + /// } + /// ``` /// /// # Safety /// @@ -208,12 +412,34 @@ pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { Some(unsafe { self.value_unchecked(index) }) } - /// Access the value at slot `i` as a [`geo`] scalar, not considering validity. + /// Returns the value at slot `i` as a [`geo`] scalar, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let value = array.value_as_geo(0); // geo::Point + /// assert_eq!(value.x(), 1.); + /// assert_eq!(value.y(), 2.); + /// ``` fn value_as_geo(&'a self, i: usize) -> Self::ItemGeo { self.value(i).into() } - /// Access the value at slot `i` as a [`geo`] scalar, considering validity. + /// Returns the value at slot `i` as a [`geo`] scalar, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(array.get_as_geo(0).is_some()); + /// ``` fn get_as_geo(&'a self, i: usize) -> Option { if self.is_null(i) { return None; @@ -222,92 +448,316 @@ pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { Some(self.value_as_geo(i)) } + /// Iterates over this array's geoarrow scalar values, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let maybe_points: Vec> = array.iter().collect(); + /// ``` fn iter(&'a self) -> impl ExactSizeIterator> + 'a { (0..self.len()).map(|i| unsafe { self.get_unchecked(i) }) } - /// Iterator over geoarrow scalar values, not looking at validity + /// Iterator over geoarrow scalar values, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let points: Vec<_> = array.iter_values().collect(); + /// ``` fn iter_values(&'a self) -> impl ExactSizeIterator + 'a { (0..self.len()).map(|i| unsafe { self.value_unchecked(i) }) } - /// Iterator over geo scalar values, taking into account validity + /// Iterator over geo scalar values, considering validity + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let maybe_points: Vec> = array.iter_geo().collect(); + /// ``` fn iter_geo(&'a self) -> impl ExactSizeIterator> + 'a { (0..self.len()).map(|i| unsafe { self.get_unchecked(i) }.map(|x| x.into())) } /// Iterator over geo scalar values, not looking at validity + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let points: Vec<_> = array.iter_geo_values().collect(); + /// ``` fn iter_geo_values(&'a self) -> impl ExactSizeIterator + 'a { (0..self.len()).map(|i| unsafe { self.value_unchecked(i) }.into()) } } -/// Horrible name, to be changed to a better name in the future!! +/// Trait for geometry array methods that return `Self`. +/// +/// TODO Horrible name, to be changed to a better name in the future!! pub trait GeometryArraySelfMethods { - /// Create a new array with replaced coordinates + /// Creates a new array with replaced coordinates. /// /// This is useful if you want to apply an operation to _every_ coordinate in unison, such as a /// reprojection or a scaling operation, with no regards to each individual geometry + /// + /// # Example + /// + /// ``` + /// use geoarrow::{ + /// array::{PointArray, CoordBuffer, InterleavedCoordBuffer}, + /// trait_::{GeometryArraySelfMethods, GeometryArrayAccessor}, + /// }; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let coords = CoordBuffer::Interleaved(InterleavedCoordBuffer::new(vec![3., 4.].into())); + /// let array = array.with_coords(coords); + /// let value = array.value_as_geo(0); + /// assert_eq!(value.x(), 3.); + /// assert_eq!(value.y(), 4.); + /// ``` fn with_coords(self, coords: CoordBuffer) -> Self; - /// Cast the coordinate buffer of this geometry array to the given coordinate type. + /// Casts the coordinate buffer of this geometry array to the given coordinate type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::{PointArray, CoordType, CoordBuffer}, + /// trait_::{GeometryArrayAccessor, GeometryArraySelfMethods}, + /// }; + /// + /// let point_0 = geo::point!(x: 1., y: 2.); + /// let point_1 = geo::point!(x: 3., y: 4.); + /// let array_interleaved: PointArray<2> = vec![point_0, point_1].as_slice().into(); + /// let array_separated = array_interleaved.into_coord_type(CoordType::Separated); + /// assert!(matches!(array_separated.coords(), &CoordBuffer::Separated(_))); + /// ``` fn into_coord_type(self, coord_type: CoordType) -> Self; /// Returns a zero-copy slice of this array with the indicated offset and length. /// - /// # Panic + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::PointArray, + /// trait_::{GeometryArraySelfMethods, GeometryArrayAccessor, GeometryArrayTrait} + /// }; + /// + /// let point_0 = geo::point!(x: 1., y: 2.); + /// let point_1 = geo::point!(x: 3., y: 4.); + /// let array: PointArray<2> = vec![point_0, point_1].as_slice().into(); + /// let smaller_array = array.slice(1, 1); + /// assert_eq!(smaller_array.len(), 1); + /// let value = smaller_array.value_as_geo(0); + /// assert_eq!(value.x(), 3.); + /// assert_eq!(value.y(), 4.); + /// ``` + /// + /// # Panics + /// /// This function panics iff `offset + length > self.len()`. #[must_use] fn slice(&self, offset: usize, length: usize) -> Self; - /// A slice that fully copies the contents of the underlying buffer + /// Returns a owned slice that fully copies the contents of the underlying buffer. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, trait_::GeometryArraySelfMethods}; + /// + /// let point_0 = geo::point!(x: 1., y: 2.); + /// let point_1 = geo::point!(x: 3., y: 4.); + /// let array: PointArray<2> = vec![point_0, point_1].as_slice().into(); + /// let smaller_array = array.owned_slice(1, 1); + /// ``` #[must_use] fn owned_slice(&self, offset: usize, length: usize) -> Self; } +/// Convert GeoArrow arrays into their underlying arrow arrays. pub trait IntoArrow { + /// The type of arrow array that this geoarrow array can be converted into. type ArrowArray; + /// Converts this geoarrow array into an arrow array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::IntoArrow, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let arrow_array = array.into_arrow(); + /// ``` fn into_arrow(self) -> Self::ArrowArray; } +/// A trait for converting geoarrow scalar types to their [mod@geo] equivalent. pub trait GeometryScalarTrait { /// The [`geo`] scalar object for this geometry array type. type ScalarGeo; + /// Converts this value to its [mod@geo] equivalent. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::{GeometryScalarTrait, GeometryArrayAccessor}, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let point = array.value(0).to_geo(); // array.value_as_geo(0) does the same thing + /// assert_eq!(point.x(), 1.); + /// assert_eq!(point.y(), 2.); + /// ``` fn to_geo(&self) -> Self::ScalarGeo; + /// Converts this value to a [geo::Geometry]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::{GeometryScalarTrait, GeometryArrayAccessor}, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let geometry = array.value(0).to_geo_geometry(); + /// ``` fn to_geo_geometry(&self) -> geo::Geometry; + /// Converts this value to a [geos::Geometry]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::{GeometryScalarTrait, GeometryArrayAccessor}, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let geometry = array.value(0).to_geos().unwrap(); + /// ``` #[cfg(feature = "geos")] fn to_geos(&self) -> std::result::Result; } /// A trait describing a mutable geometry array; i.e. an array whose values can be changed. +/// /// Mutable arrays cannot be cloned but can be mutated in place, /// thereby making them useful to perform numeric operations without allocations. /// As in [`GeometryArrayTrait`], concrete arrays (such as /// [`PointBuilder`][crate::array::PointBuilder]) implement how they are mutated. pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { - /// The length of the array. + /// Returns the length of the array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// + /// let mut builder = PointBuilder::new(); + /// assert_eq!(builder.len(), 0); + /// builder.push_point(Some(&geo::point!(x: 1., y: 2.))); + /// assert_eq!(builder.len(), 1); + /// ``` fn len(&self) -> usize; - /// Whether the array is empty. + /// Returns whether the array is empty. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// + /// let mut builder = PointBuilder::new(); + /// assert!(builder.is_empty()); + /// builder.push_point(Some(&geo::point!(x: 1., y: 2.))); + /// assert!(!builder.is_empty()); + /// ``` fn is_empty(&self) -> bool { self.len() == 0 } - /// The optional validity of the array. + /// Returns the validity buffer of this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// + /// let builder = PointBuilder::<2>::new(); + /// assert!(builder.nulls().is_empty()); + /// ``` fn nulls(&self) -> &NullBufferBuilder; + /// Creates a new builder. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// let builder = PointBuilder::<2>::new(); + /// ``` fn new() -> Self; + /// Creates a new builder with capacity and other options. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::{PointBuilder, CoordType, metadata::{ArrayMetadata, Edges}}, + /// trait_::GeometryArrayBuilder, + /// }; + /// let metadata = ArrayMetadata { + /// crs: None, + /// edges: Some(Edges::Spherical), + /// }; + /// let builder = PointBuilder::<2>::with_geom_capacity_and_options( + /// 2, + /// CoordType::Interleaved, + /// metadata.into() + /// ); + /// ``` fn with_geom_capacity_and_options( geom_capacity: usize, coord_type: CoordType, metadata: Arc, ) -> Self; + /// Creates a new builder with the given capacity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::PointBuilder, + /// trait_::GeometryArrayBuilder, + /// }; + /// let builder = PointBuilder::<2>::with_geom_capacity(2); + /// ``` fn with_geom_capacity(geom_capacity: usize) -> Self { GeometryArrayBuilder::with_geom_capacity_and_options( geom_capacity, @@ -316,13 +766,58 @@ pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { ) } + /// Sets this builders metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::{PointBuilder, metadata::{ArrayMetadata, Edges}}, + /// trait_::GeometryArrayBuilder, + /// }; + /// let mut builder = PointBuilder::<2>::new(); + /// let metadata = ArrayMetadata { + /// crs: None, + /// edges: Some(Edges::Spherical), + /// }; + /// builder.set_metadata(metadata.into()); + /// ``` fn set_metadata(&mut self, metadata: Arc); + /// Finishes building the underlying data structures and returns a geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::{GeometryArrayBuilder, GeometryArrayTrait}}; + /// + /// let mut builder = PointBuilder::new(); + /// builder.push_point(Some(&geo::point!(x: 1., y: 2.))); + /// let array = builder.finish(); + /// assert_eq!(array.len(), 1); + /// ``` fn finish(self) -> Arc; - /// Get the coordinate type of this geometry array, either interleaved or separated. + /// Returns the [CoordType] of this geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::{PointBuilder, CoordType}, trait_::GeometryArrayBuilder}; + /// let builder = PointBuilder::<2>::new(); + /// assert_eq!(builder.coord_type(), CoordType::Interleaved); + /// ``` fn coord_type(&self) -> CoordType; + /// Returns the [ArrayMetadata] of this geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::{PointBuilder, CoordType}, trait_::GeometryArrayBuilder}; + /// let builder = PointBuilder::<2>::new(); + /// let metadata = builder.metadata(); + /// ``` fn metadata(&self) -> Arc; // /// Convert itself to an (immutable) [`GeometryArray`]. @@ -356,5 +851,14 @@ pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { // /// Shrink the array to fit its length. // fn shrink_to_fit(&mut self); + /// Converts this builder into a dynamic array reference. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// let builder = PointBuilder::<2>::new(); + /// let array_ref = builder.into_array_ref(); + /// ``` fn into_array_ref(self) -> Arc; } From 01d568f37ade767f055e8f775a010de4c0d81fa0 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 13:40:37 -0600 Subject: [PATCH 04/15] fix(docs): use original badge style Co-authored-by: Kyle Barron --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 04c3f184..d420b307 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # geoarrow-rs -[![GitHub Workflow Status (CI)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/ci.yml?branch=main&style=for-the-badge)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/ci.yml) -[![docs.rs](https://img.shields.io/docsrs/geoarrow?style=for-the-badge&label=docs.rs)](https://docs.rs/geoarrow/latest/geoarrow/) -[![Crates.io](https://img.shields.io/crates/v/geoarrow?style=for-the-badge)](https://crates.io/crates/geoarrow) -![Crates.io](https://img.shields.io/crates/l/geoarrow?style=for-the-badge) +[![GitHub Workflow Status (CI)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/ci.yml?branch=main)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/ci.yml) +[![docs.rs](https://img.shields.io/docsrs/geoarrow?label=docs.rs)](https://docs.rs/geoarrow/latest/geoarrow/) +[![Crates.io](https://img.shields.io/crates/v/geoarrow)](https://crates.io/crates/geoarrow) +![Crates.io](https://img.shields.io/crates/l/geoarrow) A Rust implementation of the [GeoArrow](https://github.com/geoarrow/geoarrow) specification and bindings to [GeoRust algorithms](https://github.com/georust/geo) for efficient spatial operations on GeoArrow memory. From a33f9a238b0983263165ef685a952d374ceb74f5 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 13:40:50 -0600 Subject: [PATCH 05/15] fix(docs): use original badge style Co-authored-by: Kyle Barron --- js/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/README.md b/js/README.md index 9644e32f..159f8e8a 100644 --- a/js/README.md +++ b/js/README.md @@ -1,6 +1,6 @@ # geoarrow-wasm -[![GitHub Workflow Status (WASM)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/wasm.yml?label=WASM&branch=main&style=for-the-badge)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/wasm.yml) +[![GitHub Workflow Status (WASM)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/wasm.yml?label=WASM&branch=main)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/wasm.yml) Efficient, vectorized geospatial operations in WebAssembly. From 247ab92460034acb89c70e58c4a088b5c6625053 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 13:40:59 -0600 Subject: [PATCH 06/15] fix(docs): use original badge style Co-authored-by: Kyle Barron --- python/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/README.md b/python/README.md index 509650de..9eacb6f5 100644 --- a/python/README.md +++ b/python/README.md @@ -1,6 +1,6 @@ # geoarrow.rust -[![GitHub Workflow Status (Python)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/python.yml?branch=main&style=for-the-badge)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/python.yml) +[![GitHub Workflow Status (Python)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/python.yml?branch=main)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/python.yml) This folder contains Python bindings to the [GeoArrow Rust implementation](https://github.com/geoarrow/geoarrow-rs). From fdae83fd906d6f2d4fd17e6acd8cf84dddae998e Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 13:42:16 -0600 Subject: [PATCH 07/15] fix(docs): add more info to edges docs --- src/array/metadata.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/array/metadata.rs b/src/array/metadata.rs index 44ffde63..56cb3764 100644 --- a/src/array/metadata.rs +++ b/src/array/metadata.rs @@ -13,6 +13,10 @@ use crate::error::GeoArrowError; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum Edges { /// Follow a spherical path rather than a planar. + /// + /// See [the geoarrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/extension-types.md#extension-metadata) + /// for more information aobut how `edges` should be used. #[serde(rename = "spherical")] Spherical, } From 45fe1c67f4f4f3445c32eb12e82a19de9774285a Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 13:43:22 -0600 Subject: [PATCH 08/15] fix(docs): remove extra whitespace Co-authored-by: Kyle Barron --- src/datatypes.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/datatypes.rs b/src/datatypes.rs index 3f2701c0..425e1bf1 100644 --- a/src/datatypes.rs +++ b/src/datatypes.rs @@ -34,7 +34,6 @@ impl Dimension { /// /// # Examples /// - /// /// ``` /// use geoarrow::datatypes::Dimension; /// From 9e3172ceb1e2dbf2bb8f1a99b8882ddc071619ef Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 13:58:29 -0600 Subject: [PATCH 09/15] fix(docs): update wording Co-authored-by: Kyle Barron --- src/trait_.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trait_.rs b/src/trait_.rs index 5a88b71c..448c239e 100644 --- a/src/trait_.rs +++ b/src/trait_.rs @@ -52,7 +52,7 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { /// ``` fn data_type(&self) -> GeoDataType; - /// Returns the [DataType] of this array. + /// Returns the physical [DataType] of this array. /// /// # Examples /// From 7c3b58a020bafc442e2494b1c5a5c63250bff0b6 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 13:59:11 -0600 Subject: [PATCH 10/15] fix(docs): toml example Co-authored-by: Kyle Barron --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 7978d7fb..c5c439aa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ //! For example, to convert between [geosjon](https://geojson.org/) and GeoArrow, enable the `geozero` feature in your `Cargo.toml`: //! //! ```toml -//! #[dependencies] +//! [dependencies] //! geoarrow = { version = "*", features = ["geozero"] } //! ``` //! From 89409cd8d5bc30b012f1b5c5bcb278cbfa96349b Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 13:59:31 -0600 Subject: [PATCH 11/15] fix(docs): link to `ArrayRef` Co-authored-by: Kyle Barron --- src/trait_.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trait_.rs b/src/trait_.rs index 448c239e..34c7a0b8 100644 --- a/src/trait_.rs +++ b/src/trait_.rs @@ -851,7 +851,7 @@ pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { // /// Shrink the array to fit its length. // fn shrink_to_fit(&mut self); - /// Converts this builder into a dynamic array reference. + /// Converts this builder into an [`ArrayRef`], a dynamic array reference. /// /// # Examples /// From c6c1167b08fc782ba589e4b61dfff153abffc44f Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 14:11:33 -0600 Subject: [PATCH 12/15] fix(docs): add `# Errors` section to some docstrings --- src/table.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/table.rs b/src/table.rs index ec48f28f..f10df235 100644 --- a/src/table.rs +++ b/src/table.rs @@ -44,6 +44,8 @@ pub struct Table { impl Table { /// Creates a new table from a schema and a vector of record batches. /// + /// # Errors + /// /// Returns an error if a record batch's schema fields do not match the /// top-level schema's fields. /// @@ -83,6 +85,8 @@ impl Table { /// Creates a new table from a schema, a vector of record batches, and a chunked geometry array. /// + /// # Errors + /// /// Returns an error if a record batch's schema fields do not match the /// top-level schema's fields, or if the batches are empty. /// @@ -460,6 +464,8 @@ impl Table { /// Returns this table's default geometry index. /// + /// # Errors + /// /// Returns an error if there is more than one geometry column. /// /// # Examples From fb3946cbb15e9c2d043195b576753734b74acb3d Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 14:12:27 -0600 Subject: [PATCH 13/15] fix(docs): spelling Co-authored-by: Kyle Barron --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c5c439aa..8b5b03a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ //! //! The [io] module has functions for reading and writing GeoArrow data from a variety of formats. //! To use most format readers and writers, you must enable their corresponding feature. -//! For example, to convert between [geosjon](https://geojson.org/) and GeoArrow, enable the `geozero` feature in your `Cargo.toml`: +//! For example, to convert between [geojson](https://geojson.org/) and GeoArrow, enable the `geozero` feature in your `Cargo.toml`: //! //! ```toml //! [dependencies] From e8a0374583e8a95fe9ec169fddc3825068dc5995 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 14:15:12 -0600 Subject: [PATCH 14/15] docs: add more notes about geo traits --- src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 8b5b03a2..7635930d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,9 @@ //! //! # Constructing //! -//! You can build GeoArrow arrays all at once from [mod@geo] structures: +//! You can build GeoArrow arrays all at once from [mod@geo] structures, or anything that implements geometry traits, e.g. [PointTrait](crate::geo_traits::PointTrait). +//! Along with the GeoRust community, **geoarrow-rs** has been prototyping geometry access traits for a standardized way to access coordinate information, regardless of the storage format of the geometries. +//! For now, we vendor an implementation of geo-traits (see [mod@geo_traits]), but this may be upstreamed to georust in the future. //! //! ``` //! use geoarrow::array::PointArray; From 5cd0e76156951aeeb67ddc23ad9618017f433c51 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 13 Aug 2024 15:17:52 -0600 Subject: [PATCH 15/15] refactor!: consolidate on (batches, schema) --- js/src/io/flatgeobuf.rs | 2 +- js/src/io/geojson.rs | 4 ++-- js/src/io/parquet/async.rs | 8 ++++---- js/src/io/parquet/sync.rs | 4 ++-- python/core/Cargo.lock | 5 ----- python/core/src/ffi/from_python/table.rs | 2 +- python/core/src/interop/util.rs | 4 ++-- python/core/src/io/parquet/reader.rs | 8 ++++---- src/algorithm/native/explode.rs | 2 +- src/io/gdal/reader.rs | 2 +- src/io/geozero/table/builder/table.rs | 2 +- src/io/ipc/reader.rs | 4 ++-- src/io/parquet/reader/async.rs | 2 +- src/io/parquet/reader/builder.rs | 2 +- src/io/stream.rs | 2 +- src/table.rs | 24 ++++++++++++------------ src/test/point.rs | 2 +- 17 files changed, 37 insertions(+), 42 deletions(-) diff --git a/js/src/io/flatgeobuf.rs b/js/src/io/flatgeobuf.rs index 1ae43167..50c1f2d4 100644 --- a/js/src/io/flatgeobuf.rs +++ b/js/src/io/flatgeobuf.rs @@ -33,6 +33,6 @@ pub fn read_flatgeobuf(file: &[u8], batch_size: Option) -> WasmResult) -> WasmResult // assert_parquet_file_not_empty(parquet_file)?; let mut cursor = Cursor::new(file); let geo_table = _read_geojson(&mut cursor, batch_size)?; - let (schema, batches) = geo_table.into_inner(); + let (batches, schema) = geo_table.into_inner(); Ok(Table::new(schema, batches)) } @@ -39,7 +39,7 @@ pub fn read_geojson(file: &[u8], batch_size: Option) -> WasmResult
#[wasm_bindgen(js_name = writeGeoJSON)] pub fn write_geojson(table: Table) -> WasmResult> { let (schema, batches) = table.into_inner(); - let rust_table = geoarrow::table::Table::try_new(schema, batches)?; + let rust_table = geoarrow::table::Table::try_new(batches, schema)?; let mut output_file: Vec = vec![]; _write_geojson(rust_table, &mut output_file)?; Ok(output_file) diff --git a/js/src/io/parquet/async.rs b/js/src/io/parquet/async.rs index 2651ce44..e9d6e82e 100644 --- a/js/src/io/parquet/async.rs +++ b/js/src/io/parquet/async.rs @@ -135,7 +135,7 @@ impl ParquetFile { ) .build()?; let table = stream.read_table().await?; - let (schema, batches) = table.into_inner(); + let (batches, schema) = table.into_inner(); Ok(Table::new(schema, batches)) } #[wasm_bindgen] @@ -267,11 +267,11 @@ impl ParquetDataset { let mut all_batches = vec![]; tables.into_iter().for_each(|table| { - let (_schema, table_batches) = table.into_inner(); + let (table_batches, _schema) = table.into_inner(); all_batches.extend(table_batches); }); - let table = geoarrow::table::Table::try_new(output_schema, all_batches)?; - let (schema, batches) = table.into_inner(); + let table = geoarrow::table::Table::try_new(all_batches, output_schema)?; + let (batches, schema) = table.into_inner(); Ok(Table::new(schema, batches)) } diff --git a/js/src/io/parquet/sync.rs b/js/src/io/parquet/sync.rs index ffeb0e5d..e3c7238c 100644 --- a/js/src/io/parquet/sync.rs +++ b/js/src/io/parquet/sync.rs @@ -37,7 +37,7 @@ pub fn read_geoparquet(file: Vec) -> WasmResult
{ )? .build()?; let geo_table = reader.read_table()?; - let (schema, batches) = geo_table.into_inner(); + let (batches, schema) = geo_table.into_inner(); Ok(Table::new(schema, batches)) } @@ -47,7 +47,7 @@ pub fn read_geoparquet(file: Vec) -> WasmResult
{ #[wasm_bindgen(js_name = writeGeoParquet)] pub fn write_geoparquet(table: Table) -> WasmResult> { let (schema, batches) = table.into_inner(); - let mut rust_table = geoarrow::table::Table::try_new(schema, batches)?; + let mut rust_table = geoarrow::table::Table::try_new(batches, schema)?; let mut output_file: Vec = vec![]; _write_geoparquet(&mut rust_table, &mut output_file, &Default::default())?; Ok(output_file) diff --git a/python/core/Cargo.lock b/python/core/Cargo.lock index 4c8e1f7c..71d12f64 100644 --- a/python/core/Cargo.lock +++ b/python/core/Cargo.lock @@ -1024,18 +1024,15 @@ dependencies = [ name = "geoarrow" version = "0.3.0-alpha.1" dependencies = [ - "anyhow", "arrow", "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", "arrow-ipc", - "arrow-json", "arrow-schema", "async-stream", "async-trait", - "bumpalo", "byteorder", "bytes", "chrono", @@ -1043,12 +1040,10 @@ dependencies = [ "futures", "geo", "geo-index", - "geojson", "geozero", "half", "http-range-client", "indexmap", - "itertools 0.13.0", "lexical-core", "num_enum", "object_store", diff --git a/python/core/src/ffi/from_python/table.rs b/python/core/src/ffi/from_python/table.rs index 9e0abd73..94a85a51 100644 --- a/python/core/src/ffi/from_python/table.rs +++ b/python/core/src/ffi/from_python/table.rs @@ -20,7 +20,7 @@ impl<'a> FromPyObject<'a> for GeoTable { batches.push(batch); } - let table = geoarrow::table::Table::try_new(schema, batches) + let table = geoarrow::table::Table::try_new(batches, schema) .map_err(|e| PyValueError::new_err(e.to_string()))?; let table = table .downcast(true) diff --git a/python/core/src/interop/util.rs b/python/core/src/interop/util.rs index 01c92f68..9449d92a 100644 --- a/python/core/src/interop/util.rs +++ b/python/core/src/interop/util.rs @@ -50,11 +50,11 @@ pub(crate) fn import_pyogrio(py: Python) -> PyGeoArrowResult> { } pub(crate) fn table_to_pytable(table: geoarrow::table::Table) -> PyTable { - let (schema, batches) = table.into_inner(); + let (batches, schema) = table.into_inner(); PyTable::new(batches, schema) } pub(crate) fn pytable_to_table(table: PyTable) -> Result { let (batches, schema) = table.into_inner(); - geoarrow::table::Table::try_new(schema, batches) + geoarrow::table::Table::try_new(batches, schema) } diff --git a/python/core/src/io/parquet/reader.rs b/python/core/src/io/parquet/reader.rs index 972641d0..3d7b73c0 100644 --- a/python/core/src/io/parquet/reader.rs +++ b/python/core/src/io/parquet/reader.rs @@ -574,10 +574,10 @@ impl ParquetDataset { let mut all_batches = vec![]; tables.into_iter().for_each(|table| { - let (_schema, table_batches) = table.into_inner(); + let (table_batches, _schema) = table.into_inner(); all_batches.extend(table_batches); }); - let table = Table::try_new(output_schema, all_batches) + let table = Table::try_new(all_batches, output_schema) .map_err(PyGeoArrowError::GeoArrowError)?; Ok(table_to_pytable(table)) })?; @@ -609,10 +609,10 @@ impl ParquetDataset { let mut all_batches = vec![]; tables.into_iter().for_each(|table| { - let (_schema, table_batches) = table.into_inner(); + let (table_batches, _schema) = table.into_inner(); all_batches.extend(table_batches); }); - let table = Table::try_new(output_schema, all_batches) + let table = Table::try_new(all_batches, output_schema) .map_err(PyGeoArrowError::GeoArrowError)?; Ok(table_to_pytable(table).to_arro3(py)?) }) diff --git a/src/algorithm/native/explode.rs b/src/algorithm/native/explode.rs index c7a03a9c..1b43fa86 100644 --- a/src/algorithm/native/explode.rs +++ b/src/algorithm/native/explode.rs @@ -286,7 +286,7 @@ impl ExplodeTable for Table { schema_builder.push(field.clone()); let schema = schema_builder.finish(); - Table::try_new(schema.into(), new_batches) + Table::try_new(new_batches, schema.into()) } else { // No take is necessary; nothing happens Ok(self.clone()) diff --git a/src/io/gdal/reader.rs b/src/io/gdal/reader.rs index 1c14234c..fb6df890 100644 --- a/src/io/gdal/reader.rs +++ b/src/io/gdal/reader.rs @@ -40,7 +40,7 @@ pub fn read_gdal(layer: &mut Layer, batch_size: Option) -> Result
.into_iter() .collect::, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } #[cfg(test)] diff --git a/src/io/geozero/table/builder/table.rs b/src/io/geozero/table/builder/table.rs index ce64e699..78b1e998 100644 --- a/src/io/geozero/table/builder/table.rs +++ b/src/io/geozero/table/builder/table.rs @@ -204,7 +204,7 @@ impl GeoTableBuilder { let batches = self.batches; let schema = batches[0].schema(); - let mut table = Table::try_new(schema, batches)?; + let mut table = Table::try_new(batches, schema)?; let geom_slices = self .geom_arrays diff --git a/src/io/ipc/reader.rs b/src/io/ipc/reader.rs index 11ed2e96..d05e236a 100644 --- a/src/io/ipc/reader.rs +++ b/src/io/ipc/reader.rs @@ -11,7 +11,7 @@ pub fn read_ipc(reader: R) -> Result
{ let reader = FileReader::try_new(reader, None)?; let schema = reader.schema(); let batches = reader.collect::, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } /// Read into a Table from Arrow IPC record batch stream. @@ -19,5 +19,5 @@ pub fn read_ipc_stream(reader: R) -> Result
{ let reader = StreamReader::try_new(reader, None)?; let schema = reader.schema(); let batches = reader.collect::, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } diff --git a/src/io/parquet/reader/async.rs b/src/io/parquet/reader/async.rs index 05e59ad7..170e1497 100644 --- a/src/io/parquet/reader/async.rs +++ b/src/io/parquet/reader/async.rs @@ -152,7 +152,7 @@ impl GeoParquetRecordBatchStream pub async fn read_table(self) -> Result
{ let output_schema = self.output_schema.clone(); let batches = self.read_stream().try_collect::<_>().await?; - Table::try_new(output_schema, batches) + Table::try_new(batches, output_schema) } } diff --git a/src/io/parquet/reader/builder.rs b/src/io/parquet/reader/builder.rs index 5e6b33e5..5d02d453 100644 --- a/src/io/parquet/reader/builder.rs +++ b/src/io/parquet/reader/builder.rs @@ -127,7 +127,7 @@ impl GeoParquetRecordBatchReader { pub fn read_table(self) -> Result
{ let output_schema = self.output_schema.clone(); let batches = self.collect::, ArrowError>>()?; - Table::try_new(output_schema, batches) + Table::try_new(batches, output_schema) } } diff --git a/src/io/stream.rs b/src/io/stream.rs index 069daa1f..19534e91 100644 --- a/src/io/stream.rs +++ b/src/io/stream.rs @@ -27,7 +27,7 @@ impl RecordBatchReader { impl From
for RecordBatchReader { fn from(value: Table) -> Self { - let (schema, batches) = value.into_inner(); + let (batches, schema) = value.into_inner(); Self(Some(Box::new(RecordBatchIterator::new( batches.into_iter().map(Ok), schema, diff --git a/src/table.rs b/src/table.rs index f10df235..108d07ff 100644 --- a/src/table.rs +++ b/src/table.rs @@ -62,9 +62,9 @@ impl Table { /// let schema: SchemaRef = Schema::new(vec![field]).into(); /// let columns = vec![array.into_array_ref()]; /// let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); - /// let table = Table::try_new(schema, vec![batch]).unwrap(); + /// let table = Table::try_new(vec![batch], schema).unwrap(); /// ``` - pub fn try_new(schema: SchemaRef, batches: Vec) -> Result { + pub fn try_new(batches: Vec, schema: SchemaRef) -> Result { for batch in batches.iter() { // Don't check schema metadata in comparisons. // TODO: I have some issues in the Parquet reader where the batches are missing the @@ -80,7 +80,7 @@ impl Table { } } - Ok(Self { schema, batches }) + Ok(Self { batches, schema }) } /// Creates a new table from a schema, a vector of record batches, and a chunked geometry array. @@ -116,11 +116,11 @@ impl Table { /// vec![Arc::new(id_array)] /// ).unwrap(); /// - /// let table = Table::from_arrow_and_geometry(schema_ref, vec![batch], Arc::new(chunked_array)).unwrap(); + /// let table = Table::from_arrow_and_geometry(vec![batch], schema_ref, Arc::new(chunked_array)).unwrap(); /// ``` pub fn from_arrow_and_geometry( - schema: SchemaRef, batches: Vec, + schema: SchemaRef, geometry: Arc, ) -> Result { if batches.is_empty() { @@ -138,7 +138,7 @@ impl Table { new_batches.push(RecordBatch::try_new(new_schema.clone(), columns)?); } - Self::try_new(new_schema, new_batches) + Self::try_new(new_batches, new_schema) } /// Casts the geometry at `index` to a different data type @@ -269,7 +269,7 @@ impl Table { target_geo_data_type: Option, ) -> Result { if batches.is_empty() { - return Self::try_new(schema, batches); + return Self::try_new(batches, schema); } let num_batches = batches.len(); @@ -369,7 +369,7 @@ impl Table { new_record_batches.push(RecordBatch::try_new(new_schema.clone(), new_batch).unwrap()); } - Table::try_new(new_schema, new_record_batches) + Table::try_new(new_record_batches, new_schema) } /// Returns the length of this table. @@ -419,11 +419,11 @@ impl Table { /// /// let file = File::open("fixtures/roads.geojson").unwrap(); /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); - /// let (schema, record_batches) = table.into_inner(); + /// let (batches, schema) = table.into_inner(); /// # } /// ``` - pub fn into_inner(self) -> (SchemaRef, Vec) { - (self.schema, self.batches) + pub fn into_inner(self) -> (Vec, SchemaRef) { + (self.batches, self.schema) } /// Returns a reference to this table's schema. @@ -706,6 +706,6 @@ impl TryFrom> for Table { let batches = value .into_iter() .collect::, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } } diff --git a/src/test/point.rs b/src/test/point.rs index f781987c..6bbb4e74 100644 --- a/src/test/point.rs +++ b/src/test/point.rs @@ -53,5 +53,5 @@ pub(crate) fn table() -> Table { ) .unwrap(); - Table::try_new(schema, vec![batch]).unwrap() + Table::try_new(vec![batch], schema).unwrap() }