From 9eaf839de3dc1b96c4b27df6b3430d784aee99ef Mon Sep 17 00:00:00 2001 From: Pete Gadomski <pete.gadomski@gmail.com> Date: Tue, 13 Aug 2024 16:09:03 -0600 Subject: [PATCH] Add some (but not all) docs for the Rust crate (#696) Includes: - A breaking cahnge to `Table::from_arrow_and_geometry` to make the argument order the same as `Table::try_new` - Removal of a leftover `dbg!` statement in [src/array/coord/separated/array.rs](https://github.com/geoarrow/geoarrow-rs/commit/0ab9dc00918cb914067bda3b7251e7ca49047aee#diff-05c34ee401b527590650de1809d1edcc54957ce8a5c47128371bc83729ba2392) - Moved two single-file `mod-name/mod.rs` modules up a level (`table.rs` and `chunked_array.rs`) - Some README tweaks, including adding badges - `#![deny(missing_docs)]` to `lib.rs`, and then `#![allow(missing_docs)]` on the "deep" modules (e.g. `algorithm`, `array`) - Docs to the rest of the modules, with examples almost always NB I've updated https://github.com/geoarrow/geoarrow-rs/issues/689 with a checklist to track progress on the "deep" modules --------- Co-authored-by: Kyle Barron <kylebarron2@gmail.com> --- Cargo.toml | 1 + README.md | 9 +- js/README.md | 7 +- js/src/io/flatgeobuf.rs | 2 +- js/src/io/geojson.rs | 4 +- js/src/io/parquet/async.rs | 8 +- js/src/io/parquet/sync.rs | 4 +- python/README.md | 4 +- python/core/Cargo.lock | 5 - python/core/src/ffi/from_python/table.rs | 2 +- python/core/src/interop/util.rs | 4 +- python/core/src/io/parquet/reader.rs | 8 +- src/algorithm/mod.rs | 2 + src/algorithm/native/explode.rs | 2 +- src/array/coord/mod.rs | 3 + src/array/coord/separated/array.rs | 1 - src/array/metadata.rs | 5 + src/array/mod.rs | 2 + src/{chunked_array => }/chunked_array.rs | 471 ++++++++++++++++- src/chunked_array/mod.rs | 19 - src/datatypes.rs | 98 +++- src/error.rs | 17 + src/geo_traits/mod.rs | 2 + src/indexed/mod.rs | 6 +- src/io/gdal/reader.rs | 2 +- src/io/geozero/table/builder/table.rs | 2 +- src/io/ipc/reader.rs | 4 +- src/io/mod.rs | 2 + src/io/parquet/reader/async.rs | 2 +- src/io/parquet/reader/builder.rs | 2 +- src/io/stream.rs | 2 +- src/lib.rs | 56 ++- src/scalar/mod.rs | 2 + src/{table/mod.rs => table.rs} | 283 ++++++++++- src/test/point.rs | 2 +- src/trait_.rs | 610 +++++++++++++++++++++-- 36 files changed, 1506 insertions(+), 149 deletions(-) rename src/{chunked_array => }/chunked_array.rs (51%) delete mode 100644 src/chunked_array/mod.rs rename src/{table/mod.rs => table.rs} (63%) diff --git a/Cargo.toml b/Cargo.toml index 1ae271f6..a2af42ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -155,6 +155,7 @@ bench = false # TODO fix this benchmark required-features = ["parquet_compression"] [package.metadata.docs.rs] +rustdoc-args = ["--cfg", "docsrs"] features = [ "csv", "flatgeobuf", diff --git a/README.md b/README.md index 9ffc12d3..d420b307 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ -# `geoarrow-rs` +# geoarrow-rs + +[![GitHub Workflow Status (CI)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/ci.yml?branch=main)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/ci.yml) +[![docs.rs](https://img.shields.io/docsrs/geoarrow?label=docs.rs)](https://docs.rs/geoarrow/latest/geoarrow/) +[![Crates.io](https://img.shields.io/crates/v/geoarrow)](https://crates.io/crates/geoarrow) +![Crates.io](https://img.shields.io/crates/l/geoarrow) A Rust implementation of the [GeoArrow](https://github.com/geoarrow/geoarrow) specification and bindings to [GeoRust algorithms](https://github.com/georust/geo) for efficient spatial operations on GeoArrow memory. @@ -20,7 +25,7 @@ This repository also includes [Python bindings](https://github.com/geoarrow/geoa Add this to your `Cargo.toml`: ```toml -geoarrow = "0.1" +geoarrow = "0.2" ``` ## References diff --git a/js/README.md b/js/README.md index aa499e7e..159f8e8a 100644 --- a/js/README.md +++ b/js/README.md @@ -1,4 +1,6 @@ -# `geoarrow-wasm` +# geoarrow-wasm + +[![GitHub Workflow Status (WASM)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/wasm.yml?label=WASM&branch=main)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/wasm.yml) Efficient, vectorized geospatial operations in WebAssembly. @@ -21,7 +23,6 @@ I wrote a [blog post](https://kylebarron.dev/blog/geos-wasm) about this that goe Most users will use this by installing the prebuilt JavaScript package. This is published to NPM as [`geoarrow-wasm`](https://npmjs.com/package/geoarrow-wasm). - ### From Rust Advanced users can also depend on these Rust-Wasm bindings directly, enabling you to add custom operations on top of these bindings and generating your own WebAssembly bundles. This means you can reuse all the binding between JavaScript and WebAssembly and focus on implementing your algorithms. This package is published to crates.io as [`geoarrow-wasm`](https://crates.io/crates/geoarrow-wasm). @@ -31,5 +32,3 @@ Advanced users can also depend on these Rust-Wasm bindings directly, enabling yo - [Prototyping GeoRust + GeoArrow in WebAssembly](https://observablehq.com/@kylebarron/prototyping-georust-geoarrow-in-webassembly) ## How it Works - - diff --git a/js/src/io/flatgeobuf.rs b/js/src/io/flatgeobuf.rs index 1ae43167..50c1f2d4 100644 --- a/js/src/io/flatgeobuf.rs +++ b/js/src/io/flatgeobuf.rs @@ -33,6 +33,6 @@ pub fn read_flatgeobuf(file: &[u8], batch_size: Option<usize>) -> WasmResult<Tab ..Default::default() }; let geo_table = _read_flatgeobuf(&mut cursor, options)?; - let (schema, batches) = geo_table.into_inner(); + let (batches, schema) = geo_table.into_inner(); Ok(Table::new(schema, batches)) } diff --git a/js/src/io/geojson.rs b/js/src/io/geojson.rs index 1c8e72ee..a55aada6 100644 --- a/js/src/io/geojson.rs +++ b/js/src/io/geojson.rs @@ -29,7 +29,7 @@ pub fn read_geojson(file: &[u8], batch_size: Option<usize>) -> WasmResult<Table> // assert_parquet_file_not_empty(parquet_file)?; let mut cursor = Cursor::new(file); let geo_table = _read_geojson(&mut cursor, batch_size)?; - let (schema, batches) = geo_table.into_inner(); + let (batches, schema) = geo_table.into_inner(); Ok(Table::new(schema, batches)) } @@ -39,7 +39,7 @@ pub fn read_geojson(file: &[u8], batch_size: Option<usize>) -> WasmResult<Table> #[wasm_bindgen(js_name = writeGeoJSON)] pub fn write_geojson(table: Table) -> WasmResult<Vec<u8>> { let (schema, batches) = table.into_inner(); - let rust_table = geoarrow::table::Table::try_new(schema, batches)?; + let rust_table = geoarrow::table::Table::try_new(batches, schema)?; let mut output_file: Vec<u8> = vec![]; _write_geojson(rust_table, &mut output_file)?; Ok(output_file) diff --git a/js/src/io/parquet/async.rs b/js/src/io/parquet/async.rs index 2651ce44..e9d6e82e 100644 --- a/js/src/io/parquet/async.rs +++ b/js/src/io/parquet/async.rs @@ -135,7 +135,7 @@ impl ParquetFile { ) .build()?; let table = stream.read_table().await?; - let (schema, batches) = table.into_inner(); + let (batches, schema) = table.into_inner(); Ok(Table::new(schema, batches)) } #[wasm_bindgen] @@ -267,11 +267,11 @@ impl ParquetDataset { let mut all_batches = vec![]; tables.into_iter().for_each(|table| { - let (_schema, table_batches) = table.into_inner(); + let (table_batches, _schema) = table.into_inner(); all_batches.extend(table_batches); }); - let table = geoarrow::table::Table::try_new(output_schema, all_batches)?; - let (schema, batches) = table.into_inner(); + let table = geoarrow::table::Table::try_new(all_batches, output_schema)?; + let (batches, schema) = table.into_inner(); Ok(Table::new(schema, batches)) } diff --git a/js/src/io/parquet/sync.rs b/js/src/io/parquet/sync.rs index ffeb0e5d..e3c7238c 100644 --- a/js/src/io/parquet/sync.rs +++ b/js/src/io/parquet/sync.rs @@ -37,7 +37,7 @@ pub fn read_geoparquet(file: Vec<u8>) -> WasmResult<Table> { )? .build()?; let geo_table = reader.read_table()?; - let (schema, batches) = geo_table.into_inner(); + let (batches, schema) = geo_table.into_inner(); Ok(Table::new(schema, batches)) } @@ -47,7 +47,7 @@ pub fn read_geoparquet(file: Vec<u8>) -> WasmResult<Table> { #[wasm_bindgen(js_name = writeGeoParquet)] pub fn write_geoparquet(table: Table) -> WasmResult<Vec<u8>> { let (schema, batches) = table.into_inner(); - let mut rust_table = geoarrow::table::Table::try_new(schema, batches)?; + let mut rust_table = geoarrow::table::Table::try_new(batches, schema)?; let mut output_file: Vec<u8> = vec![]; _write_geoparquet(&mut rust_table, &mut output_file, &Default::default())?; Ok(output_file) diff --git a/python/README.md b/python/README.md index 6fdb3430..9eacb6f5 100644 --- a/python/README.md +++ b/python/README.md @@ -1,4 +1,6 @@ -# `geoarrow.rust`: Python bindings to `geoarrow-rs` +# geoarrow.rust + +[![GitHub Workflow Status (Python)](https://img.shields.io/github/actions/workflow/status/geoarrow/geoarrow-rs/python.yml?branch=main)](https://github.com/geoarrow/geoarrow-rs/actions/workflows/python.yml) This folder contains Python bindings to the [GeoArrow Rust implementation](https://github.com/geoarrow/geoarrow-rs). diff --git a/python/core/Cargo.lock b/python/core/Cargo.lock index 4c8e1f7c..71d12f64 100644 --- a/python/core/Cargo.lock +++ b/python/core/Cargo.lock @@ -1024,18 +1024,15 @@ dependencies = [ name = "geoarrow" version = "0.3.0-alpha.1" dependencies = [ - "anyhow", "arrow", "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", "arrow-ipc", - "arrow-json", "arrow-schema", "async-stream", "async-trait", - "bumpalo", "byteorder", "bytes", "chrono", @@ -1043,12 +1040,10 @@ dependencies = [ "futures", "geo", "geo-index", - "geojson", "geozero", "half", "http-range-client", "indexmap", - "itertools 0.13.0", "lexical-core", "num_enum", "object_store", diff --git a/python/core/src/ffi/from_python/table.rs b/python/core/src/ffi/from_python/table.rs index 9e0abd73..94a85a51 100644 --- a/python/core/src/ffi/from_python/table.rs +++ b/python/core/src/ffi/from_python/table.rs @@ -20,7 +20,7 @@ impl<'a> FromPyObject<'a> for GeoTable { batches.push(batch); } - let table = geoarrow::table::Table::try_new(schema, batches) + let table = geoarrow::table::Table::try_new(batches, schema) .map_err(|e| PyValueError::new_err(e.to_string()))?; let table = table .downcast(true) diff --git a/python/core/src/interop/util.rs b/python/core/src/interop/util.rs index 01c92f68..9449d92a 100644 --- a/python/core/src/interop/util.rs +++ b/python/core/src/interop/util.rs @@ -50,11 +50,11 @@ pub(crate) fn import_pyogrio(py: Python) -> PyGeoArrowResult<Bound<PyModule>> { } pub(crate) fn table_to_pytable(table: geoarrow::table::Table) -> PyTable { - let (schema, batches) = table.into_inner(); + let (batches, schema) = table.into_inner(); PyTable::new(batches, schema) } pub(crate) fn pytable_to_table(table: PyTable) -> Result<geoarrow::table::Table, GeoArrowError> { let (batches, schema) = table.into_inner(); - geoarrow::table::Table::try_new(schema, batches) + geoarrow::table::Table::try_new(batches, schema) } diff --git a/python/core/src/io/parquet/reader.rs b/python/core/src/io/parquet/reader.rs index 972641d0..3d7b73c0 100644 --- a/python/core/src/io/parquet/reader.rs +++ b/python/core/src/io/parquet/reader.rs @@ -574,10 +574,10 @@ impl ParquetDataset { let mut all_batches = vec![]; tables.into_iter().for_each(|table| { - let (_schema, table_batches) = table.into_inner(); + let (table_batches, _schema) = table.into_inner(); all_batches.extend(table_batches); }); - let table = Table::try_new(output_schema, all_batches) + let table = Table::try_new(all_batches, output_schema) .map_err(PyGeoArrowError::GeoArrowError)?; Ok(table_to_pytable(table)) })?; @@ -609,10 +609,10 @@ impl ParquetDataset { let mut all_batches = vec![]; tables.into_iter().for_each(|table| { - let (_schema, table_batches) = table.into_inner(); + let (table_batches, _schema) = table.into_inner(); all_batches.extend(table_batches); }); - let table = Table::try_new(output_schema, all_batches) + let table = Table::try_new(all_batches, output_schema) .map_err(PyGeoArrowError::GeoArrowError)?; Ok(table_to_pytable(table).to_arro3(py)?) }) diff --git a/src/algorithm/mod.rs b/src/algorithm/mod.rs index f170d362..f7acc616 100644 --- a/src/algorithm/mod.rs +++ b/src/algorithm/mod.rs @@ -1,5 +1,7 @@ //! Vectorized algorithms implemented on and returning GeoArrow arrays. +#![allow(missing_docs)] // FIXME + pub mod broadcasting; pub mod geo; pub mod geo_index; diff --git a/src/algorithm/native/explode.rs b/src/algorithm/native/explode.rs index c7a03a9c..1b43fa86 100644 --- a/src/algorithm/native/explode.rs +++ b/src/algorithm/native/explode.rs @@ -286,7 +286,7 @@ impl ExplodeTable for Table { schema_builder.push(field.clone()); let schema = schema_builder.finish(); - Table::try_new(schema.into(), new_batches) + Table::try_new(new_batches, schema.into()) } else { // No take is necessary; nothing happens Ok(self.clone()) diff --git a/src/array/coord/mod.rs b/src/array/coord/mod.rs index 78cce7d8..5a96375d 100644 --- a/src/array/coord/mod.rs +++ b/src/array/coord/mod.rs @@ -19,7 +19,10 @@ pub use separated::{SeparatedCoordBuffer, SeparatedCoordBufferBuilder}; /// buffers as XXXX and YYYY. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum CoordType { + /// Interleaved coordinates. #[default] Interleaved, + + /// Separated coordinates. Separated, } diff --git a/src/array/coord/separated/array.rs b/src/array/coord/separated/array.rs index 7d76b610..c1980033 100644 --- a/src/array/coord/separated/array.rs +++ b/src/array/coord/separated/array.rs @@ -20,7 +20,6 @@ pub struct SeparatedCoordBuffer<const D: usize> { } fn check<const D: usize>(buffers: &[ScalarBuffer<f64>; D]) -> Result<()> { - dbg!(buffers); if !buffers.windows(2).all(|w| w[0].len() == w[1].len()) { return Err(GeoArrowError::General( "all buffers must have the same length".to_string(), diff --git a/src/array/metadata.rs b/src/array/metadata.rs index 9136518a..56cb3764 100644 --- a/src/array/metadata.rs +++ b/src/array/metadata.rs @@ -12,6 +12,11 @@ use crate::error::GeoArrowError; /// this value is omitted, edges will be interpreted as planar. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum Edges { + /// Follow a spherical path rather than a planar. + /// + /// See [the geoarrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/extension-types.md#extension-metadata) + /// for more information aobut how `edges` should be used. #[serde(rename = "spherical")] Spherical, } diff --git a/src/array/mod.rs b/src/array/mod.rs index e87666a0..ed59ffd9 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -1,5 +1,7 @@ //! Implementations of immutable GeoArrow arrays plus builders to more easily create arrays. +#![allow(missing_docs)] // FIXME + pub use binary::{WKBArray, WKBBuilder, WKBCapacity}; pub use cast::{AsChunkedGeometryArray, AsGeometryArray}; pub use coord::{ diff --git a/src/chunked_array/chunked_array.rs b/src/chunked_array.rs similarity index 51% rename from src/chunked_array/chunked_array.rs rename to src/chunked_array.rs index 4c5d5722..71095d4d 100644 --- a/src/chunked_array/chunked_array.rs +++ b/src/chunked_array.rs @@ -1,3 +1,12 @@ +//! Contains implementations of _chunked_ GeoArrow arrays. +//! +//! In contrast to the structures in [array](crate::array), these data structures only have contiguous +//! memory within each individual _chunk_. These chunked arrays are essentially wrappers around a +//! [Vec] of geometry arrays. +//! +//! Additionally, if the `rayon` feature is active, operations on chunked arrays will automatically +//! be parallelized across each chunk. + use std::any::Any; use std::collections::HashSet; use std::sync::Arc; @@ -25,6 +34,18 @@ pub struct ChunkedArray<A: Array> { } impl<A: Array> ChunkedArray<A> { + /// Creates a new chunked array from multiple arrays. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// ``` pub fn new(chunks: Vec<A>) -> Self { let mut length = 0; chunks.iter().for_each(|x| length += x.len()); @@ -39,32 +60,130 @@ impl<A: Array> ChunkedArray<A> { Self { chunks, length } } + /// Converts this chunked array into its inner chunks. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.into_inner(); + /// assert_eq!(chunks.len(), 2); + /// ``` pub fn into_inner(self) -> Vec<A> { self.chunks } + /// Returns this chunked array's length. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.len(), 4); + /// ``` pub fn len(&self) -> usize { self.length } + /// Returns true if chunked array is empty. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// assert!(ChunkedArray::<Int32Array>::new(Vec::new()).is_empty()); + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// assert!(!chunked_array.is_empty()); + /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Returns this chunked array's data type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// use arrow_schema::DataType; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.data_type(), &DataType::Int32); + /// ``` pub fn data_type(&self) -> &DataType { self.chunks.first().unwrap().data_type() } + /// Returns the number of nulls in this chunked array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.null_count(), 0); + /// ``` pub fn null_count(&self) -> usize { self.chunks() .iter() .fold(0, |acc, chunk| acc + chunk.null_count()) } + /// Returns an immutable reference to this chunked array's chunks. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1, 2]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.chunks(); + /// ``` pub fn chunks(&self) -> &[A] { self.chunks.as_slice() } + /// Applies an operation over each chunk of this chunked array. + /// + /// If the `rayon` feature is enabled, this will be done in parallel. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.map(|chunk| chunk.len()); + /// assert_eq!(lengths, vec![1, 2]); + /// ``` #[allow(dead_code)] pub fn map<F: Fn(&A) -> R + Sync + Send, R: Send>(&self, map_op: F) -> Vec<R> { #[cfg(feature = "rayon")] @@ -82,7 +201,22 @@ impl<A: Array> ChunkedArray<A> { self.chunks.iter().map(map_op).collect() } } - + /// Applies an operation over each chunk of this chunked array, returning a `Result`. + /// + /// If the `rayon` feature is enabled, this will be done in parallel. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::chunked_array::ChunkedArray; + /// use arrow_array::Int32Array; + /// + /// let array_0 = Int32Array::from(vec![1]); + /// let array_1 = Int32Array::from(vec![3, 4]); + /// let chunked_array = ChunkedArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.try_map(|chunk| Ok(chunk.len())).unwrap(); + /// assert_eq!(lengths, vec![1, 2]); + /// ``` pub fn try_map<F: Fn(&A) -> Result<R> + Sync + Send, R: Send>( &self, map_op: F, @@ -118,9 +252,9 @@ impl<A: Array> AsRef<[A]> for ChunkedArray<A> { /// This can be thought of as a geometry column in a table, as Table objects normally have internal /// batches. /// -/// ## Invariants: +/// # Invariants /// -/// - Must have at least one chunk +/// Must have at least one chunk. #[derive(Debug, Clone, PartialEq)] pub struct ChunkedGeometryArray<G: GeometryArrayTrait> { pub(crate) chunks: Vec<G>, @@ -128,6 +262,17 @@ pub struct ChunkedGeometryArray<G: GeometryArrayTrait> { } impl<G: GeometryArrayTrait> ChunkedGeometryArray<G> { + /// Creates a new chunked geometry array from multiple arrays. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// ``` pub fn new(chunks: Vec<G>) -> Self { // TODO: assert all equal extension fields let mut length = 0; @@ -135,32 +280,125 @@ impl<G: GeometryArrayTrait> ChunkedGeometryArray<G> { Self { chunks, length } } - // TODO: check/assert on creation that all are the same so we can be comfortable here only - // taking the first. + /// Returns the extension field for this chunked geometry array. + /// + /// TODO: check/assert on creation that all are the same so we can be comfortable here only + /// taking the first. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let field = chunked_array.extension_field(); + /// assert_eq!(field.name(), "geometry"); + /// ``` pub fn extension_field(&self) -> Arc<Field> { self.chunks.first().unwrap().extension_field() } + /// Converts this chunked geometry array into its inner chunks. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.into_inner(); + /// assert_eq!(chunks.len(), 2); + /// ``` pub fn into_inner(self) -> Vec<G> { self.chunks } + /// Returns this chunked geometry array length. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.), &geo::point!(x: 5., y: 6.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.len(), 3); + /// ``` pub fn len(&self) -> usize { self.length } + /// Returns true if this chunked geometry array is empty. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// assert!(!chunked_array.is_empty()); + /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Returns an immutable reference to this array's chunks. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.chunks(); + /// ``` pub fn chunks(&self) -> &[G] { self.chunks.as_slice() } + /// Returns this array's geo data type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray, datatypes::GeoDataType}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// assert!(matches!(chunked_array.data_type(), GeoDataType::Point(_, _))); + /// ``` pub fn data_type(&self) -> GeoDataType { self.chunks.first().unwrap().data_type() } + /// Converts this chunked array into a vector, where each element is the output of `map_op` for one chunk. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::ChunkedGeometryArray, + /// array::PointArray, + /// trait_::GeometryArrayTrait, + /// datatypes::GeoDataType, + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.into_map(|chunk| chunk.len()); // chunked_array is consumed + /// assert_eq!(lengths, vec![1, 1]); + /// ``` pub fn into_map<F: Fn(G) -> R + Sync + Send, R: Send>(self, map_op: F) -> Vec<R> { #[cfg(feature = "rayon")] { @@ -178,6 +416,24 @@ impl<G: GeometryArrayTrait> ChunkedGeometryArray<G> { } } + /// Maps this chunked array into a vector, where each element is the output of `map_op` for one chunk. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::ChunkedGeometryArray, + /// array::PointArray, + /// trait_::GeometryArrayTrait, + /// datatypes::GeoDataType, + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.map(|chunk| chunk.len()); + /// assert_eq!(lengths, vec![1, 1]); + /// ``` pub fn map<F: Fn(&G) -> R + Sync + Send, R: Send>(&self, map_op: F) -> Vec<R> { #[cfg(feature = "rayon")] { @@ -195,6 +451,24 @@ impl<G: GeometryArrayTrait> ChunkedGeometryArray<G> { } } + /// Maps this chunked array into a vector, where each element is the `Result` output of `map_op` for one chunk. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::ChunkedGeometryArray, + /// array::PointArray, + /// trait_::GeometryArrayTrait, + /// datatypes::GeoDataType, + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let lengths = chunked_array.try_map(|chunk| Ok(chunk.len())).unwrap(); + /// assert_eq!(lengths, vec![1, 1]); + /// ``` pub fn try_map<F: Fn(&G) -> Result<R> + Sync + Send, R: Send>( &self, map_op: F, @@ -212,6 +486,22 @@ impl<G: GeometryArrayTrait> ChunkedGeometryArray<G> { } impl<'a, G: GeometryArrayTrait + GeometryArrayAccessor<'a>> ChunkedGeometryArray<G> { + /// Returns a value from this chunked array, ignoring validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let value = chunked_array.value(1); // geoarrow::scalar::Point<2> + /// ``` + /// + /// # Panics + /// + /// Panics if the index exceeds the size of this chunked array. pub fn value(&'a self, index: usize) -> G::Item { assert!(index <= self.len()); let mut index = index; @@ -225,6 +515,22 @@ impl<'a, G: GeometryArrayTrait + GeometryArrayAccessor<'a>> ChunkedGeometryArray unreachable!() } + /// Returns a value from this chunked array, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{chunked_array::ChunkedGeometryArray, array::PointArray}; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let value = chunked_array.get(1).unwrap(); // geoarrow::scalar::Point<2> + /// ``` + /// + /// # Panics + /// + /// Panics if the index exceeds the size of this chunked array. pub fn get(&'a self, index: usize) -> Option<G::Item> { assert!(index <= self.len()); let mut index = index; @@ -247,20 +553,31 @@ impl<G: GeometryArrayTrait> TryFrom<Vec<G>> for ChunkedGeometryArray<G> { } } +/// A chunked point array. pub type ChunkedPointArray<const D: usize> = ChunkedGeometryArray<PointArray<D>>; +/// A chunked line string array. pub type ChunkedLineStringArray<O, const D: usize> = ChunkedGeometryArray<LineStringArray<O, D>>; +/// A chunked polygon array. pub type ChunkedPolygonArray<O, const D: usize> = ChunkedGeometryArray<PolygonArray<O, D>>; +/// A chunked multi-point array. pub type ChunkedMultiPointArray<O, const D: usize> = ChunkedGeometryArray<MultiPointArray<O, D>>; +/// A chunked mutli-line string array. pub type ChunkedMultiLineStringArray<O, const D: usize> = ChunkedGeometryArray<MultiLineStringArray<O, D>>; +/// A chunked multi-polygon array. pub type ChunkedMultiPolygonArray<O, const D: usize> = ChunkedGeometryArray<MultiPolygonArray<O, D>>; +/// A chunked mixed geometry array. pub type ChunkedMixedGeometryArray<O, const D: usize> = ChunkedGeometryArray<MixedGeometryArray<O, D>>; +/// A chunked geometry collection array. pub type ChunkedGeometryCollectionArray<O, const D: usize> = ChunkedGeometryArray<GeometryCollectionArray<O, D>>; +/// A chunked WKB array. pub type ChunkedWKBArray<O> = ChunkedGeometryArray<WKBArray<O>>; +/// A chunked rect array. pub type ChunkedRectArray<const D: usize> = ChunkedGeometryArray<RectArray<D>>; +/// A chunked unknown geometry array. #[allow(dead_code)] pub type ChunkedUnknownGeometryArray = ChunkedGeometryArray<Arc<dyn GeometryArrayTrait>>; @@ -271,26 +588,128 @@ pub type ChunkedUnknownGeometryArray = ChunkedGeometryArray<Arc<dyn GeometryArra /// strongly-typed chunked array, use `as_any` with the `data_type` method to discern which chunked /// array type to pass to `downcast_ref`. pub trait ChunkedGeometryArrayTrait: std::fmt::Debug + Send + Sync { - /// Returns the array as [`Any`] so that it can be - /// downcasted to a specific implementation. + /// Returns the array as [`Any`] so that it can be downcasted to a specific implementation. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let any = chunked_array.as_any(); + /// ``` fn as_any(&self) -> &dyn Any; /// Returns a reference to the [`GeoDataType`] of this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let data_type = chunked_array.data_type(); + /// ``` fn data_type(&self) -> GeoDataType; - /// Returns an Arrow [`Field`] describing this chunked array. This field will always have the - /// `ARROW:extension:name` key of the field metadata set, signifying that it describes a - /// GeoArrow extension type. + /// Returns an Arrow [`Field`] describing this chunked array. + /// + /// This field will always have the `ARROW:extension:name` key of the field + /// metadata set, signifying that it describes a GeoArrow extension type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let field = chunked_array.extension_field(); + /// assert_eq!(field.metadata()["ARROW:extension:name"], "geoarrow.point"); + /// ``` fn extension_field(&self) -> Arc<Field>; - /// Access the geometry chunks contained within this chunked array. + /// Returns a vector of references to the geometry chunks contained within this chunked array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let chunks = chunked_array.geometry_chunks(); + /// assert_eq!(chunks.len(), 2); + /// ``` fn geometry_chunks(&self) -> Vec<&dyn GeometryArrayTrait>; - /// The number of chunks in this chunked array. + /// Returns the number of chunks in this chunked array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// assert_eq!(chunked_array.num_chunks(), 2); + /// ``` fn num_chunks(&self) -> usize; + /// Returns a reference to this chunked geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let array_ref = chunked_array.as_ref(); + /// ``` fn as_ref(&self) -> &dyn ChunkedGeometryArrayTrait; + /// Returns a vector of references to the underlying arrow arrays. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// chunked_array::{ChunkedGeometryArray, ChunkedGeometryArrayTrait}, + /// array::PointArray + /// }; + /// + /// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); + /// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); + /// let arrays = chunked_array.array_refs(); + /// ``` fn array_refs(&self) -> Vec<Arc<dyn Array>>; } @@ -446,8 +865,22 @@ impl<const D: usize> ChunkedGeometryArrayTrait for ChunkedRectArray<D> { } } -/// Construct +/// Constructs a chunked geometry array from arrow chunks. +/// /// Does **not** parse WKB. Will return a ChunkedWKBArray for WKB input. +/// +/// # Examples +/// +/// ``` +/// use geoarrow::{GeometryArrayTrait, array::PointArray}; +/// use std::sync::Arc; +/// +/// let array: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); +/// let field = array.extension_field(); +/// let array = array.into_array_ref(); +/// let chunks = vec![array.as_ref()]; +/// let chunked_array = geoarrow::chunked_array::from_arrow_chunks(chunks.as_slice(), &field).unwrap(); +/// ``` pub fn from_arrow_chunks( chunks: &[&dyn Array], field: &Field, @@ -515,6 +948,18 @@ pub fn from_arrow_chunks( } } +/// Creates a chunked geometry array from geoarrow chunks. +/// +/// # Examples +/// +/// ``` +/// use geoarrow::{GeometryArrayTrait, array::PointArray}; +/// +/// let array_0: PointArray<2> = vec![&geo::point!(x: 1., y: 2.)].as_slice().into(); +/// let array_1: PointArray<2> = vec![&geo::point!(x: 3., y: 4.)].as_slice().into(); +/// let chunks = vec![array_0.as_ref(), array_1.as_ref()]; +/// let chunked_array = geoarrow::chunked_array::from_geoarrow_chunks(chunks.as_slice()).unwrap(); +/// ``` pub fn from_geoarrow_chunks( chunks: &[&dyn GeometryArrayTrait], ) -> Result<Arc<dyn ChunkedGeometryArrayTrait>> { diff --git a/src/chunked_array/mod.rs b/src/chunked_array/mod.rs deleted file mode 100644 index 8100e2a4..00000000 --- a/src/chunked_array/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -//! Contains implementations of _chunked_ GeoArrow arrays. -//! -//! In contrast to the structures in [array](../array), these data structures only have contiguous -//! memory within each individual _chunk_. These chunked arrays are essentially wrappers around a -//! `Vec` of geometry arrays. -//! -//! Additionally, if the `rayon` feature is active, operations on chunked arrays will automatically -//! be parallelized across each chunk. - -#[allow(clippy::module_inception)] -mod chunked_array; - -pub use chunked_array::{ - from_arrow_chunks, from_geoarrow_chunks, ChunkedArray, ChunkedGeometryArray, - ChunkedGeometryArrayTrait, ChunkedGeometryCollectionArray, ChunkedLineStringArray, - ChunkedMixedGeometryArray, ChunkedMultiLineStringArray, ChunkedMultiPointArray, - ChunkedMultiPolygonArray, ChunkedPointArray, ChunkedPolygonArray, ChunkedRectArray, - ChunkedWKBArray, -}; diff --git a/src/datatypes.rs b/src/datatypes.rs index 5416570f..425e1bf1 100644 --- a/src/datatypes.rs +++ b/src/datatypes.rs @@ -11,14 +11,35 @@ use crate::array::metadata::ArrayMetadata; use crate::array::CoordType; use crate::error::{GeoArrowError, Result}; -/// The dimension of the geometry array +/// The dimension of the geometry array. +/// +/// [Dimension] implements [TryFrom] for integers: +/// +/// ``` +/// use geoarrow::datatypes::Dimension; +/// +/// assert_eq!(Dimension::try_from(2).unwrap(), Dimension::XY); +/// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Dimension { + /// Two-dimensional. XY, + + /// Three-dimensional. XYZ, } impl Dimension { + /// Returns the size of this dimension. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::datatypes::Dimension; + /// + /// assert_eq!(Dimension::XY.size(), 2); + /// assert_eq!(Dimension::XYZ.size(), 3); + /// ``` pub fn size(&self) -> usize { match self { Dimension::XY => 2, @@ -49,9 +70,9 @@ impl TryFrom<i32> for Dimension { } } -/// The geometry type is designed to aid in downcasting from dynamically-typed geometry arrays by -/// uniquely identifying the physical buffer layout of each geometry array type. +/// The geodata type is designed to aid in downcasting from dynamically-typed geometry arrays. /// +/// The geodata type uniquely identifies the physical buffer layout of each geometry array type. /// It must always be possible to accurately downcast from a `dyn &GeometryArrayTrait` or `dyn /// &ChunkedGeometryArrayTrait` to a unique concrete array type using this enum. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -331,10 +352,20 @@ fn rect_data_type(dim: Dimension) -> DataType { } impl GeoDataType { - /// Convert a [`GeoDataType`] into the relevant arrow [`DataType`]. + /// Converts a [`GeoDataType`] into the relevant arrow [`DataType`]. /// /// Note that an arrow [`DataType`] will lose the accompanying GeoArrow metadata if it is not /// part of a [`Field`] with GeoArrow extension metadata in its field metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::CoordType, datatypes::{GeoDataType, Dimension}}; + /// use arrow_schema::DataType; + /// + /// let data_type = GeoDataType::Point(CoordType::Interleaved, Dimension::XY).to_data_type(); + /// assert!(matches!(data_type, DataType::FixedSizeList(_, _))); + /// ``` pub fn to_data_type(&self) -> DataType { use GeoDataType::*; match self { @@ -367,7 +398,16 @@ impl GeoDataType { } } - /// Get the GeoArrow extension name pertaining to this data type. + /// Returns the GeoArrow extension name pertaining to this data type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::datatypes::GeoDataType; + /// + /// let geo_data_type = GeoDataType::Point(Default::default(), 2.try_into().unwrap()); + /// assert_eq!(geo_data_type.extension_name(), "geoarrow.point") + /// ``` pub fn extension_name(&self) -> &'static str { use GeoDataType::*; match self { @@ -386,8 +426,20 @@ impl GeoDataType { } } - /// Convert this [`GeoDataType`] into an arrow [`Field`], maintaining GeoArrow extension + /// Converts this [`GeoDataType`] into an arrow [`Field`], maintaining GeoArrow extension /// metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::datatypes::GeoDataType; + /// + /// let geo_data_type = GeoDataType::Point(Default::default(), 2.try_into().unwrap()); + /// let field = geo_data_type.to_field("geometry", false); + /// assert_eq!(field.name(), "geometry"); + /// assert!(!field.is_nullable()); + /// assert_eq!(field.metadata()["ARROW:extension:name"], "geoarrow.point"); + /// ``` pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field { let extension_name = self.extension_name(); let mut metadata = HashMap::with_capacity(1); @@ -398,6 +450,20 @@ impl GeoDataType { Field::new(name, self.to_data_type(), nullable).with_metadata(metadata) } + /// Converts this geo-data type to a field with the additional [ArrayMetadata]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::metadata::{ArrayMetadata, Edges}, datatypes::GeoDataType}; + /// + /// let geo_data_type = GeoDataType::Point(Default::default(), 2.try_into().unwrap()); + /// let metadata = ArrayMetadata { + /// crs: None, + /// edges: Some(Edges::Spherical), + /// }; + /// let field = geo_data_type.to_field_with_metadata("geometry", false, &metadata); + /// ``` pub fn to_field_with_metadata<N: Into<String>>( &self, name: N, @@ -417,6 +483,16 @@ impl GeoDataType { Field::new(name, self.to_data_type(), nullable).with_metadata(metadata) } + /// Returns this geodata type with the provided [CoordType]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::CoordType, datatypes::GeoDataType}; + /// + /// let geo_data_type = GeoDataType::Point(CoordType::Interleaved, 2.try_into().unwrap()); + /// let separated_geo_data_type = geo_data_type.with_coord_type(CoordType::Separated); + /// ``` pub fn with_coord_type(self, coord_type: CoordType) -> GeoDataType { use GeoDataType::*; match self { @@ -441,6 +517,16 @@ impl GeoDataType { } } + /// Returns this geodata type with the provided [Dimension]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::datatypes::GeoDataType; + /// + /// let geo_data_type = GeoDataType::Point(Default::default(), 2.try_into().unwrap()); + /// let geo_data_type_3d = geo_data_type.with_dimension(3.try_into().unwrap()); + /// ``` pub fn with_dimension(self, dim: Dimension) -> GeoDataType { use GeoDataType::*; match self { diff --git a/src/error.rs b/src/error.rs index 0a07483a..bc4cea89 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,6 +9,7 @@ use thiserror::Error; #[derive(Error, Debug)] #[non_exhaustive] pub enum GeoArrowError { + /// Incorrect type was passed to an operation. #[error("Incorrect type passed to operation: {0}")] IncorrectType(Cow<'static, str>), @@ -16,61 +17,77 @@ pub enum GeoArrowError { #[error("Not yet implemented: {0}")] NotYetImplemented(String), + /// General error. #[error("General error: {0}")] General(String), /// Whenever pushing to a container fails because it does not support more entries. + /// /// The solution is usually to use a higher-capacity container-backing type. #[error("Overflow")] Overflow, + /// [ArrowError] #[error(transparent)] Arrow(#[from] ArrowError), + /// [geo::vincenty_distance::FailedToConvergeError] #[error(transparent)] FailedToConvergeError(#[from] geo::vincenty_distance::FailedToConvergeError), + /// [gdal::errors::GdalError] #[cfg(feature = "gdal")] #[error(transparent)] GdalError(#[from] gdal::errors::GdalError), + /// [geozero::error::GeozeroError] #[cfg(feature = "geozero")] #[error(transparent)] GeozeroError(#[from] geozero::error::GeozeroError), + /// [geos::Error] #[cfg(feature = "geos")] #[error(transparent)] GeosError(#[from] geos::Error), + /// [object_store::Error] #[cfg(feature = "flatgeobuf_async")] #[error(transparent)] ObjectStoreError(#[from] object_store::Error), + /// [parquet::errors::ParquetError] #[cfg(feature = "parquet")] #[error(transparent)] ParquetError(#[from] parquet::errors::ParquetError), + /// [polylabel::errors::PolylabelError] #[cfg(feature = "polylabel")] #[error(transparent)] PolylabelError(#[from] polylabel::errors::PolylabelError), + /// [proj::ProjError] #[cfg(feature = "proj")] #[error(transparent)] ProjError(#[from] proj::ProjError), + /// [flatgeobuf::Error] #[cfg(feature = "flatgeobuf")] #[error(transparent)] FlatgeobufError(#[from] flatgeobuf::Error), + /// [std::io::Error] #[error(transparent)] IOError(#[from] std::io::Error), + /// [serde_json::Error] #[error(transparent)] SerdeJsonError(#[from] serde_json::Error), + /// [sqlx::Error] #[cfg(feature = "postgis")] #[error(transparent)] SqlxError(#[from] sqlx::Error), } +/// Crate-specific result type. pub type Result<T> = std::result::Result<T, GeoArrowError>; diff --git a/src/geo_traits/mod.rs b/src/geo_traits/mod.rs index d2105fab..1c8429a6 100644 --- a/src/geo_traits/mod.rs +++ b/src/geo_traits/mod.rs @@ -8,6 +8,8 @@ //! [here](https://github.com/georust/geo/pull/1019)) but that is vendored into this repository for //! use internally, such as in the WKB parser. +#![allow(missing_docs)] // FIXME + pub use coord::CoordTrait; pub use geometry::{GeometryTrait, GeometryType}; pub use geometry_collection::GeometryCollectionTrait; diff --git a/src/indexed/mod.rs b/src/indexed/mod.rs index 4c8170a4..0af9abcb 100644 --- a/src/indexed/mod.rs +++ b/src/indexed/mod.rs @@ -1,5 +1,7 @@ -//! Indexed geometry arrays: arrays which are associated with a spatial index for efficient boolean -//! operations. +//! Indexed geometry arrays are associated with a spatial index for efficient +//! boolean operations. + +#![allow(missing_docs)] // FIXME pub mod array; pub mod chunked; diff --git a/src/io/gdal/reader.rs b/src/io/gdal/reader.rs index 1c14234c..fb6df890 100644 --- a/src/io/gdal/reader.rs +++ b/src/io/gdal/reader.rs @@ -40,7 +40,7 @@ pub fn read_gdal(layer: &mut Layer, batch_size: Option<usize>) -> Result<Table> .into_iter() .collect::<std::result::Result<Vec<RecordBatch>, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } #[cfg(test)] diff --git a/src/io/geozero/table/builder/table.rs b/src/io/geozero/table/builder/table.rs index ce64e699..78b1e998 100644 --- a/src/io/geozero/table/builder/table.rs +++ b/src/io/geozero/table/builder/table.rs @@ -204,7 +204,7 @@ impl<G: GeometryArrayBuilder + GeomProcessor> GeoTableBuilder<G> { let batches = self.batches; let schema = batches[0].schema(); - let mut table = Table::try_new(schema, batches)?; + let mut table = Table::try_new(batches, schema)?; let geom_slices = self .geom_arrays diff --git a/src/io/ipc/reader.rs b/src/io/ipc/reader.rs index 11ed2e96..d05e236a 100644 --- a/src/io/ipc/reader.rs +++ b/src/io/ipc/reader.rs @@ -11,7 +11,7 @@ pub fn read_ipc<R: Read + Seek>(reader: R) -> Result<Table> { let reader = FileReader::try_new(reader, None)?; let schema = reader.schema(); let batches = reader.collect::<std::result::Result<Vec<_>, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } /// Read into a Table from Arrow IPC record batch stream. @@ -19,5 +19,5 @@ pub fn read_ipc_stream<R: Read>(reader: R) -> Result<Table> { let reader = StreamReader::try_new(reader, None)?; let schema = reader.schema(); let batches = reader.collect::<std::result::Result<Vec<_>, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } diff --git a/src/io/mod.rs b/src/io/mod.rs index b89b15e8..deecc31e 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -1,6 +1,8 @@ //! Reader and writer implementations of many common geospatial file formats, including //! interoperability with the `geozero` crate. +#![allow(missing_docs)] // FIXME + #[cfg(feature = "csv")] pub mod csv; #[cfg(feature = "geozero")] diff --git a/src/io/parquet/reader/async.rs b/src/io/parquet/reader/async.rs index 05e59ad7..170e1497 100644 --- a/src/io/parquet/reader/async.rs +++ b/src/io/parquet/reader/async.rs @@ -152,7 +152,7 @@ impl<T: AsyncFileReader + Unpin + Send + 'static> GeoParquetRecordBatchStream<T> pub async fn read_table(self) -> Result<Table> { let output_schema = self.output_schema.clone(); let batches = self.read_stream().try_collect::<_>().await?; - Table::try_new(output_schema, batches) + Table::try_new(batches, output_schema) } } diff --git a/src/io/parquet/reader/builder.rs b/src/io/parquet/reader/builder.rs index 5e6b33e5..5d02d453 100644 --- a/src/io/parquet/reader/builder.rs +++ b/src/io/parquet/reader/builder.rs @@ -127,7 +127,7 @@ impl GeoParquetRecordBatchReader { pub fn read_table(self) -> Result<Table> { let output_schema = self.output_schema.clone(); let batches = self.collect::<std::result::Result<Vec<_>, ArrowError>>()?; - Table::try_new(output_schema, batches) + Table::try_new(batches, output_schema) } } diff --git a/src/io/stream.rs b/src/io/stream.rs index 069daa1f..19534e91 100644 --- a/src/io/stream.rs +++ b/src/io/stream.rs @@ -27,7 +27,7 @@ impl RecordBatchReader { impl From<Table> for RecordBatchReader { fn from(value: Table) -> Self { - let (schema, batches) = value.into_inner(); + let (batches, schema) = value.into_inner(); Self(Some(Box::new(RecordBatchIterator::new( batches.into_iter().map(Ok), schema, diff --git a/src/lib.rs b/src/lib.rs index 7fcd9a61..7635930d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,62 @@ //! A Rust implementation of the [GeoArrow](https://github.com/geoarrow/geoarrow) specification, -//! plus algorithms implemented on and returning these GeoArrow arrays. +//! including algorithms implemented on and returning these GeoArrow arrays. +//! +//! # Reading and writing +//! +//! The [io] module has functions for reading and writing GeoArrow data from a variety of formats. +//! To use most format readers and writers, you must enable their corresponding feature. +//! For example, to convert between [geojson](https://geojson.org/) and GeoArrow, enable the `geozero` feature in your `Cargo.toml`: +//! +//! ```toml +//! [dependencies] +//! geoarrow = { version = "*", features = ["geozero"] } +//! ``` +//! +//! Then: +//! +//! ``` +//! # #[cfg(feature = "geozero")] +//! # { +//! use std::{io::Cursor, fs::File}; +//! +//! // Reads geojson from a file into a GeoArrow table. +//! let file = File::open("fixtures/roads.geojson").unwrap(); +//! let table = geoarrow::io::geojson::read_geojson(file, None).unwrap(); +//! +//! // Writes that table to a cursor as JSON, then reads it back into a `serde_json::Value`. +//! let mut cursor = Cursor::new(Vec::new()); +//! geoarrow::io::geojson::write_geojson(table, &mut cursor); +//! let value: serde_json::Value = serde_json::from_slice(&cursor.into_inner()).unwrap(); +//! # } +//! ``` +//! +//! See the [io] module for more information on the available formats and their features. +//! +//! # Constructing +//! +//! You can build GeoArrow arrays all at once from [mod@geo] structures, or anything that implements geometry traits, e.g. [PointTrait](crate::geo_traits::PointTrait). +//! Along with the GeoRust community, **geoarrow-rs** has been prototyping geometry access traits for a standardized way to access coordinate information, regardless of the storage format of the geometries. +//! For now, we vendor an implementation of geo-traits (see [mod@geo_traits]), but this may be upstreamed to georust in the future. +//! +//! ``` +//! use geoarrow::array::PointArray; +//! +//! let point = geo::point!(x: 1., y: 2.); +//! let array: PointArray<2> = vec![point].as_slice().into(); +//! ``` +//! +//! Or you can use builders, e.g. [PointBuilder](crate::array::PointBuilder): +//! +//! ``` +//! use geoarrow::array::PointBuilder; +//! let mut builder = PointBuilder::new(); +//! builder.push_point(Some(&geo::point!(x: 1., y: 2.))); +//! let array = builder.finish(); +//! ``` #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![cfg_attr(not(test), deny(unused_crate_dependencies))] +#![deny(missing_docs)] // FIXME some modules allow missing docs pub use trait_::GeometryArrayTrait; diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs index 0cf05221..2adf9745 100644 --- a/src/scalar/mod.rs +++ b/src/scalar/mod.rs @@ -1,5 +1,7 @@ //! GeoArrow scalars, which are references onto a full GeoArrow array at a specific index. +#![allow(missing_docs)] // FIXME + pub use binary::{OwnedWKB, WKB}; pub use coord::{Coord, InterleavedCoord, SeparatedCoord}; pub use geometry::{Geometry, OwnedGeometry}; diff --git a/src/table/mod.rs b/src/table.rs similarity index 63% rename from src/table/mod.rs rename to src/table.rs index 18068eb2..108d07ff 100644 --- a/src/table/mod.rs +++ b/src/table.rs @@ -1,5 +1,6 @@ -//! Abstractions for Arrow tables. Useful for dataset IO where data will have geometries and -//! attributes. +//! Abstractions for Arrow tables. +//! +//! Useful for dataset IO where data will have geometries and attributes. use std::ops::Deref; use std::sync::Arc; @@ -41,7 +42,29 @@ pub struct Table { } impl Table { - pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> { + /// Creates a new table from a schema and a vector of record batches. + /// + /// # Errors + /// + /// Returns an error if a record batch's schema fields do not match the + /// top-level schema's fields. + /// + /// # Examples + /// + /// ``` + /// use arrow_array::RecordBatch; + /// use arrow_schema::{Schema, SchemaRef}; + /// use geoarrow::{GeometryArrayTrait, array::PointArray, table::Table}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let field = array.extension_field(); + /// let schema: SchemaRef = Schema::new(vec![field]).into(); + /// let columns = vec![array.into_array_ref()]; + /// let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); + /// let table = Table::try_new(vec![batch], schema).unwrap(); + /// ``` + pub fn try_new(batches: Vec<RecordBatch>, schema: SchemaRef) -> Result<Self> { for batch in batches.iter() { // Don't check schema metadata in comparisons. // TODO: I have some issues in the Parquet reader where the batches are missing the @@ -57,9 +80,44 @@ impl Table { } } - Ok(Self { schema, batches }) + Ok(Self { batches, schema }) } + /// Creates a new table from a schema, a vector of record batches, and a chunked geometry array. + /// + /// # Errors + /// + /// Returns an error if a record batch's schema fields do not match the + /// top-level schema's fields, or if the batches are empty. + /// + /// # Examples + /// + /// ``` + /// use arrow_array::{Int32Array, RecordBatch}; + /// use arrow_schema::{DataType, Schema, SchemaRef, Field}; + /// use geoarrow::{ + /// GeometryArrayTrait, + /// array::PointArray, + /// table::Table, + /// chunked_array::ChunkedGeometryArray + /// }; + /// use std::sync::Arc; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let chunked_array = ChunkedGeometryArray::new(vec![array]); + /// + /// let id_array = Int32Array::from(vec![1]); + /// let schema_ref = Arc::new(Schema::new(vec![ + /// Field::new("id", DataType::Int32, false) + /// ])); + /// let batch = RecordBatch::try_new( + /// schema_ref.clone(), + /// vec![Arc::new(id_array)] + /// ).unwrap(); + /// + /// let table = Table::from_arrow_and_geometry(vec![batch], schema_ref, Arc::new(chunked_array)).unwrap(); + /// ``` pub fn from_arrow_and_geometry( batches: Vec<RecordBatch>, schema: SchemaRef, @@ -80,10 +138,27 @@ impl Table { new_batches.push(RecordBatch::try_new(new_schema.clone(), columns)?); } - Self::try_new(new_schema, new_batches) + Self::try_new(new_batches, new_schema) } - /// Cast the geometry at `index` to a different data type + /// Casts the geometry at `index` to a different data type + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// use geoarrow::{array::CoordType, datatypes::{GeoDataType, Dimension}}; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let mut table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let index = table.default_geometry_column_idx().unwrap(); + /// + /// // Change to separated storage of coordinates + /// table.cast_geometry(index, &GeoDataType::LineString(CoordType::Separated, Dimension::XY)).unwrap(); + /// # } + /// ``` pub fn cast_geometry(&mut self, index: usize, to_type: &GeoDataType) -> Result<()> { let orig_field = self.schema().field(index); @@ -102,9 +177,13 @@ impl Table { Ok(()) } - /// Parse the geometry at `index` to a GeoArrow-native type + /// Parse the WKB geometry at `index` to a GeoArrow-native type. + /// + /// Use [Self::cast_geometry] if you know the target data type. + /// + /// # Examples /// - /// Use [Self::cast_geometry] if you know the target data type + /// TODO pub fn parse_geometry_to_native( &mut self, index: usize, @@ -182,6 +261,7 @@ impl Table { // Note: This function is relatively complex because we want to parse any WKB columns to // geoarrow-native arrays #[deprecated] + #[allow(missing_docs)] pub fn from_arrow( batches: Vec<RecordBatch>, schema: SchemaRef, @@ -189,7 +269,7 @@ impl Table { target_geo_data_type: Option<GeoDataType>, ) -> Result<Self> { if batches.is_empty() { - return Self::try_new(schema, batches); + return Self::try_new(batches, schema); } let num_batches = batches.len(); @@ -289,29 +369,117 @@ impl Table { new_record_batches.push(RecordBatch::try_new(new_schema.clone(), new_batch).unwrap()); } - Table::try_new(new_schema, new_record_batches) + Table::try_new(new_record_batches, new_schema) } + /// Returns the length of this table. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// assert_eq!(table.len(), 21); + /// # } + /// ``` pub fn len(&self) -> usize { self.batches.iter().fold(0, |sum, val| sum + val.num_rows()) } + /// Returns true if this table is empty. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// assert!(!table.is_empty()); + /// # } + /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } - pub fn into_inner(self) -> (SchemaRef, Vec<RecordBatch>) { - (self.schema, self.batches) + /// Consumes this table, returning its schema and its record batches. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let (batches, schema) = table.into_inner(); + /// # } + /// ``` + pub fn into_inner(self) -> (Vec<RecordBatch>, SchemaRef) { + (self.batches, self.schema) } + /// Returns a reference to this table's schema. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let schema = table.schema(); + /// # } + /// ``` pub fn schema(&self) -> &SchemaRef { &self.schema } + /// Returns an immutable slice of this table's record batches. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let record_batches = table.batches(); + /// # } + /// ``` pub fn batches(&self) -> &[RecordBatch] { &self.batches } + /// Returns this table's default geometry index. + /// + /// # Errors + /// + /// Returns an error if there is more than one geometry column. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// assert_eq!(table.default_geometry_column_idx().unwrap(), 6); + /// # } + /// ``` pub fn default_geometry_column_idx(&self) -> Result<usize> { let geom_col_indices = self.schema.as_ref().geometry_columns(); if geom_col_indices.len() != 1 { @@ -324,7 +492,23 @@ impl Table { } } - /// Access the geometry chunked array at the provided column index. + /// Returns a reference to the chunked geometry array at the given index. + /// + /// If index is `None` and there is only one geometry column, that array + /// will be returned. Otherwise, this method will return an error. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let chunked_array = table.geometry_column(None).unwrap(); // there's only one geometry column + /// # } + /// ``` pub fn geometry_column( &self, index: Option<usize>, @@ -351,10 +535,24 @@ impl Table { from_arrow_chunks(array_refs.as_slice(), field) } - /// Access all geometry chunked arrays from the table. + /// Returns a vector of references to all geometry chunked arrays. /// /// This may return an empty `Vec` if there are no geometry columns in the table, or may return /// more than one element if there are multiple geometry columns. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let chunked_arrays = table.geometry_columns().unwrap(); + /// assert_eq!(chunked_arrays.len(), 1); + /// # } + /// ``` pub fn geometry_columns(&self) -> Result<Vec<Arc<dyn ChunkedGeometryArrayTrait>>> { self.schema .as_ref() @@ -364,12 +562,43 @@ impl Table { .collect() } - /// The number of columns in this table. + /// Returns the number of columns in this table. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::fs::File; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// assert_eq!(table.num_columns(), 7); + /// # } + /// ``` pub fn num_columns(&self) -> usize { self.schema.fields().len() } - /// Replace the column at index `i` with the given field and arrays. + /// Replaces the column at index `i` with the given field and arrays. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::{sync::Arc, fs::File}; + /// use arrow_schema::{DataType, Field}; + /// use arrow_array::Int32Array; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let mut table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let indices: Vec<_> = (0..table.len()).map(|n| i32::try_from(n).unwrap()).collect(); + /// let array = Int32Array::from(indices); + /// let field = Field::new("id", DataType::Int32, false); + /// table.set_column(0, field.into(), vec![Arc::new(array)]).unwrap(); + /// # } + /// ``` pub fn set_column( &mut self, i: usize, @@ -416,6 +645,26 @@ impl Table { ChunkedArray::new(removed_chunks) } + /// Appends a column to this table, returning its new index. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "geozero")] + /// # { + /// use std::{sync::Arc, fs::File}; + /// use arrow_schema::{DataType, Field}; + /// use arrow_array::Int32Array; + /// + /// let file = File::open("fixtures/roads.geojson").unwrap(); + /// let mut table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); + /// let indices: Vec<_> = (0..table.len()).map(|n| i32::try_from(n).unwrap()).collect(); + /// let array = Int32Array::from(indices); + /// let field = Field::new("id", DataType::Int32, false); + /// let index = table.append_column(field.into(), vec![Arc::new(array)]).unwrap(); + /// assert_eq!(index, 7); + /// # } + /// ``` pub fn append_column(&mut self, field: FieldRef, column: Vec<Arc<dyn Array>>) -> Result<usize> { assert_eq!(self.batches().len(), column.len()); @@ -457,6 +706,6 @@ impl TryFrom<Box<dyn arrow_array::RecordBatchReader>> for Table { let batches = value .into_iter() .collect::<std::result::Result<Vec<_>, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } } diff --git a/src/test/point.rs b/src/test/point.rs index f781987c..6bbb4e74 100644 --- a/src/test/point.rs +++ b/src/test/point.rs @@ -53,5 +53,5 @@ pub(crate) fn table() -> Table { ) .unwrap(); - Table::try_new(schema, vec![batch]).unwrap() + Table::try_new(vec![batch], schema).unwrap() } diff --git a/src/trait_.rs b/src/trait_.rs index a2fba6e5..34c7a0b8 100644 --- a/src/trait_.rs +++ b/src/trait_.rs @@ -1,4 +1,4 @@ -//! Defines [`GeometryArrayTrait`], which all geometry arrays implement. +//! Defines [`GeometryArrayTrait`], which all geometry arrays implement, and other traits. use crate::array::metadata::ArrayMetadata; use crate::array::{CoordBuffer, CoordType}; @@ -18,22 +18,12 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { /// Returns the array as [`Any`] so that it can be /// downcasted to a specific implementation. /// - /// # Example: + /// # Examples /// /// ``` - /// //use geoarrow::datatypes::GeoDataType; - /// //use geoarrow::array::PointArray; - /// //use geoarrow::GeometryArrayTrait; - /// //use geo::point; - /// - /// //let point = point!(x: 1., y: 2.); - /// //let point_array: PointArray = vec![point].into(); - /// - /// //let geometry_array = Arc::new(point_array) as Arc<dyn GeometryArrayTrait>; - /// - /// # use std::sync::Arc; - /// # use arrow_array::{Int32Array, RecordBatch}; - /// # use arrow_schema::{Schema, Field, DataType, ArrowError}; + /// use std::sync::Arc; + /// use arrow_array::{Int32Array, RecordBatch}; + /// use arrow_schema::{Schema, Field, DataType, ArrowError}; /// /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]); /// let batch = RecordBatch::try_new( @@ -51,52 +41,144 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { /// Returns a the [`GeoDataType`] of this array. /// - /// # Example: + /// # Examples /// /// ``` - /// use geoarrow::datatypes::GeoDataType; - /// use geoarrow::array::PointArray; - /// use geoarrow::GeometryArrayTrait; - /// use geo::point; + /// use geoarrow::{array::PointArray, datatypes::GeoDataType, GeometryArrayTrait}; /// - /// let point = point!(x: 1., y: 2.); + /// let point = geo::point!(x: 1., y: 2.); /// let point_array: PointArray<2> = vec![point].as_slice().into(); - /// /// assert!(matches!(point_array.data_type(), GeoDataType::Point(_, _))); /// ``` fn data_type(&self) -> GeoDataType; - /// Get the logical DataType of this array. + /// Returns the physical [DataType] of this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, datatypes::GeoDataType, GeometryArrayTrait}; + /// use arrow_schema::DataType; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(matches!(point_array.storage_type(), DataType::FixedSizeList(_, _))); + /// ``` fn storage_type(&self) -> DataType; - /// Get the extension type of this array, as [defined by the GeoArrow + /// Returns the extension type of this array, as [defined by the GeoArrow /// specification](https://github.com/geoarrow/geoarrow/blob/main/extension-types.md). /// - /// Always returns `DataType::Extension`. + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// let field = point_array.extension_field(); + /// assert_eq!(field.name(), "geometry"); + /// ``` fn extension_field(&self) -> Arc<Field>; - /// Get the extension name of this array. + /// Returns the extension name of this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert_eq!(point_array.extension_name(), "geoarrow.point"); + /// ``` fn extension_name(&self) -> &str; - /// Convert this array into an arced [`arrow`] array. - /// # Implementation + /// Converts this array into an arced [`arrow`] array, consuming the original array. + /// /// This is `O(1)`. + /// + /// # Examples + /// + /// ``` + /// + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// let array_ref = point_array.into_array_ref(); + /// ``` #[must_use] fn into_array_ref(self) -> ArrayRef; + /// Converts this array into an arced [`arrow`] array. + /// + /// This is `O(1)`. + /// + /// # Examples + /// + /// ``` + /// + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// let array_ref = point_array.to_array_ref(); + /// ``` #[must_use] fn to_array_ref(&self) -> ArrayRef; - /// Get the coordinate type of this geometry array, either interleaved or separated. + /// Returns the [CoordType] of this geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::{PointArray, CoordType}, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert_eq!(point_array.coord_type(), CoordType::Interleaved); + /// ``` fn coord_type(&self) -> CoordType; + /// Converts this array to the same type of array but with the provided [CoordType]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::{PointArray, CoordType}, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// let point_array = point_array.to_coord_type(CoordType::Separated); + /// ``` #[must_use] fn to_coord_type(&self, coord_type: CoordType) -> Arc<dyn GeometryArrayTrait>; /// The number of geometries contained in this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert_eq!(point_array.len(), 1); + /// ``` fn len(&self) -> usize; /// Returns `true` if the array is empty. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, GeometryArrayTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let point_array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(!point_array.is_empty()); + /// ``` fn is_empty(&self) -> bool { self.len() == 0 } @@ -118,21 +200,69 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { /// ``` fn nulls(&self) -> Option<&NullBuffer>; + /// Returns this array's metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let metadata = array.metadata(); + /// ``` fn metadata(&self) -> Arc<ArrayMetadata>; + /// Returns a geometry array reference that includes the provided metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::{PointArray, metadata::{ArrayMetadata, Edges}}}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let metadata = ArrayMetadata { + /// crs: None, + /// edges: Some(Edges::Spherical), + /// }; + /// let metadata = array.with_metadata(metadata.into()); + /// ``` #[must_use] fn with_metadata(&self, metadata: Arc<ArrayMetadata>) -> GeometryArrayRef; - /// The number of null slots in this array. - /// # Implementation + /// Returns the number of null slots in this array. + /// /// This is `O(1)` since the number of null elements is pre-computed. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert_eq!(array.null_count(), 0); + /// ``` #[inline] fn null_count(&self) -> usize { self.nulls().map(|x| x.null_count()).unwrap_or(0) } /// Returns whether slot `i` is null. - /// # Panic + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(!array.is_null(0)); + /// ``` + /// + /// # Panics + /// /// Panics iff `i >= self.len()`. #[inline] fn is_null(&self, i: usize) -> bool { @@ -140,13 +270,36 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { } /// Returns whether slot `i` is valid. - /// # Panic + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(array.is_valid(0)); + /// ``` + /// + /// # Panics + /// /// Panics iff `i >= self.len()`. #[inline] fn is_valid(&self, i: usize) -> bool { !self.is_null(i) } + /// Returns a reference to this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{GeometryArrayTrait, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let array_ref = array.as_ref(); + /// ``` fn as_ref(&self) -> &dyn GeometryArrayTrait; // /// Clones this [`GeometryArray`] with a new new assigned bitmap. @@ -155,9 +308,10 @@ pub trait GeometryArrayTrait: std::fmt::Debug + Send + Sync { // fn with_validity(&self, validity: Option<NullBuffer>) -> Box<dyn GeometryArray>; } +/// Type alias for a dynamic reference to something that implements [GeometryArrayTrait]. pub type GeometryArrayRef = Arc<dyn GeometryArrayTrait>; -/// A generic trait for accessing the values of an [`Array`] +/// A trait for accessing the values of an [`Array`]. /// /// # Validity /// @@ -165,7 +319,7 @@ pub type GeometryArrayRef = Arc<dyn GeometryArrayTrait>; /// within the bounds `0..Array::len`, including for null indexes where [`Array::is_null`] is true. /// /// The value at null indexes is unspecified, and implementations must not rely on a specific -/// value such as [`Default::default`] being returned, however, it must not be undefined +/// value such as [`Default::default`] being returned, however, it must not be undefined. pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { /// The [geoarrow scalar object][crate::scalar] for this geometry array type. type Item: Send + Sync + GeometryScalarTrait; @@ -173,20 +327,58 @@ pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { /// The [`geo`] scalar object for this geometry array type. type ItemGeo: From<Self::Item>; - /// Returns the element at index `i` + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray, geo_traits::PointTrait}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let value = array.value(0); // geoarrow::scalar::Point<2> + /// assert_eq!(value.x(), 1.); + /// assert_eq!(value.y(), 2.); + /// ``` + /// /// # Panics - /// Panics if the value is outside the bounds of the array + /// + /// Panics if the value is outside the bounds of the array. fn value(&'a self, index: usize) -> Self::Item { assert!(index <= self.len()); unsafe { self.value_unchecked(index) } } - /// Returns the element at index `i` + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// unsafe { + /// let value = array.value_unchecked(0); // geoarrow::scalar::Point<2> + /// } + /// ``` + /// /// # Safety + /// /// Caller is responsible for ensuring that the index is within the bounds of the array unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item; - /// Access the value at slot `i` as an Arrow scalar, considering validity. + /// Returns the value at slot `i` as an Arrow scalar, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(array.get(0).is_some()); + /// ``` fn get(&'a self, index: usize) -> Option<Self::Item> { if self.is_null(index) { return None; @@ -195,7 +387,19 @@ pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { Some(self.value(index)) } - /// Access the value at slot `i` as an Arrow scalar, considering validity. + /// Returns the value at slot `i` as an Arrow scalar, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// unsafe { + /// assert!(array.get_unchecked(0).is_some()); + /// } + /// ``` /// /// # Safety /// @@ -208,12 +412,34 @@ pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { Some(unsafe { self.value_unchecked(index) }) } - /// Access the value at slot `i` as a [`geo`] scalar, not considering validity. + /// Returns the value at slot `i` as a [`geo`] scalar, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let value = array.value_as_geo(0); // geo::Point + /// assert_eq!(value.x(), 1.); + /// assert_eq!(value.y(), 2.); + /// ``` fn value_as_geo(&'a self, i: usize) -> Self::ItemGeo { self.value(i).into() } - /// Access the value at slot `i` as a [`geo`] scalar, considering validity. + /// Returns the value at slot `i` as a [`geo`] scalar, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// assert!(array.get_as_geo(0).is_some()); + /// ``` fn get_as_geo(&'a self, i: usize) -> Option<Self::ItemGeo> { if self.is_null(i) { return None; @@ -222,92 +448,316 @@ pub trait GeometryArrayAccessor<'a>: GeometryArrayTrait { Some(self.value_as_geo(i)) } + /// Iterates over this array's geoarrow scalar values, considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let maybe_points: Vec<Option<_>> = array.iter().collect(); + /// ``` fn iter(&'a self) -> impl ExactSizeIterator<Item = Option<Self::Item>> + 'a { (0..self.len()).map(|i| unsafe { self.get_unchecked(i) }) } - /// Iterator over geoarrow scalar values, not looking at validity + /// Iterator over geoarrow scalar values, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let points: Vec<_> = array.iter_values().collect(); + /// ``` fn iter_values(&'a self) -> impl ExactSizeIterator<Item = Self::Item> + 'a { (0..self.len()).map(|i| unsafe { self.value_unchecked(i) }) } - /// Iterator over geo scalar values, taking into account validity + /// Iterator over geo scalar values, considering validity + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let maybe_points: Vec<Option<_>> = array.iter_geo().collect(); + /// ``` fn iter_geo(&'a self) -> impl ExactSizeIterator<Item = Option<Self::ItemGeo>> + 'a { (0..self.len()).map(|i| unsafe { self.get_unchecked(i) }.map(|x| x.into())) } /// Iterator over geo scalar values, not looking at validity + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::GeometryArrayAccessor, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let points: Vec<_> = array.iter_geo_values().collect(); + /// ``` fn iter_geo_values(&'a self) -> impl ExactSizeIterator<Item = Self::ItemGeo> + 'a { (0..self.len()).map(|i| unsafe { self.value_unchecked(i) }.into()) } } -/// Horrible name, to be changed to a better name in the future!! +/// Trait for geometry array methods that return `Self`. +/// +/// TODO Horrible name, to be changed to a better name in the future!! pub trait GeometryArraySelfMethods<const D: usize> { - /// Create a new array with replaced coordinates + /// Creates a new array with replaced coordinates. /// /// This is useful if you want to apply an operation to _every_ coordinate in unison, such as a /// reprojection or a scaling operation, with no regards to each individual geometry + /// + /// # Example + /// + /// ``` + /// use geoarrow::{ + /// array::{PointArray, CoordBuffer, InterleavedCoordBuffer}, + /// trait_::{GeometryArraySelfMethods, GeometryArrayAccessor}, + /// }; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let coords = CoordBuffer::Interleaved(InterleavedCoordBuffer::new(vec![3., 4.].into())); + /// let array = array.with_coords(coords); + /// let value = array.value_as_geo(0); + /// assert_eq!(value.x(), 3.); + /// assert_eq!(value.y(), 4.); + /// ``` fn with_coords(self, coords: CoordBuffer<D>) -> Self; - /// Cast the coordinate buffer of this geometry array to the given coordinate type. + /// Casts the coordinate buffer of this geometry array to the given coordinate type. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::{PointArray, CoordType, CoordBuffer}, + /// trait_::{GeometryArrayAccessor, GeometryArraySelfMethods}, + /// }; + /// + /// let point_0 = geo::point!(x: 1., y: 2.); + /// let point_1 = geo::point!(x: 3., y: 4.); + /// let array_interleaved: PointArray<2> = vec![point_0, point_1].as_slice().into(); + /// let array_separated = array_interleaved.into_coord_type(CoordType::Separated); + /// assert!(matches!(array_separated.coords(), &CoordBuffer::Separated(_))); + /// ``` fn into_coord_type(self, coord_type: CoordType) -> Self; /// Returns a zero-copy slice of this array with the indicated offset and length. /// - /// # Panic + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::PointArray, + /// trait_::{GeometryArraySelfMethods, GeometryArrayAccessor, GeometryArrayTrait} + /// }; + /// + /// let point_0 = geo::point!(x: 1., y: 2.); + /// let point_1 = geo::point!(x: 3., y: 4.); + /// let array: PointArray<2> = vec![point_0, point_1].as_slice().into(); + /// let smaller_array = array.slice(1, 1); + /// assert_eq!(smaller_array.len(), 1); + /// let value = smaller_array.value_as_geo(0); + /// assert_eq!(value.x(), 3.); + /// assert_eq!(value.y(), 4.); + /// ``` + /// + /// # Panics + /// /// This function panics iff `offset + length > self.len()`. #[must_use] fn slice(&self, offset: usize, length: usize) -> Self; - /// A slice that fully copies the contents of the underlying buffer + /// Returns a owned slice that fully copies the contents of the underlying buffer. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointArray, trait_::GeometryArraySelfMethods}; + /// + /// let point_0 = geo::point!(x: 1., y: 2.); + /// let point_1 = geo::point!(x: 3., y: 4.); + /// let array: PointArray<2> = vec![point_0, point_1].as_slice().into(); + /// let smaller_array = array.owned_slice(1, 1); + /// ``` #[must_use] fn owned_slice(&self, offset: usize, length: usize) -> Self; } +/// Convert GeoArrow arrays into their underlying arrow arrays. pub trait IntoArrow { + /// The type of arrow array that this geoarrow array can be converted into. type ArrowArray; + /// Converts this geoarrow array into an arrow array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::IntoArrow, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let arrow_array = array.into_arrow(); + /// ``` fn into_arrow(self) -> Self::ArrowArray; } +/// A trait for converting geoarrow scalar types to their [mod@geo] equivalent. pub trait GeometryScalarTrait { /// The [`geo`] scalar object for this geometry array type. type ScalarGeo; + /// Converts this value to its [mod@geo] equivalent. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::{GeometryScalarTrait, GeometryArrayAccessor}, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let point = array.value(0).to_geo(); // array.value_as_geo(0) does the same thing + /// assert_eq!(point.x(), 1.); + /// assert_eq!(point.y(), 2.); + /// ``` fn to_geo(&self) -> Self::ScalarGeo; + /// Converts this value to a [geo::Geometry]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::{GeometryScalarTrait, GeometryArrayAccessor}, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let geometry = array.value(0).to_geo_geometry(); + /// ``` fn to_geo_geometry(&self) -> geo::Geometry; + /// Converts this value to a [geos::Geometry]. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{trait_::{GeometryScalarTrait, GeometryArrayAccessor}, array::PointArray}; + /// + /// let point = geo::point!(x: 1., y: 2.); + /// let array: PointArray<2> = vec![point].as_slice().into(); + /// let geometry = array.value(0).to_geos().unwrap(); + /// ``` #[cfg(feature = "geos")] fn to_geos(&self) -> std::result::Result<geos::Geometry, geos::Error>; } /// A trait describing a mutable geometry array; i.e. an array whose values can be changed. +/// /// Mutable arrays cannot be cloned but can be mutated in place, /// thereby making them useful to perform numeric operations without allocations. /// As in [`GeometryArrayTrait`], concrete arrays (such as /// [`PointBuilder`][crate::array::PointBuilder]) implement how they are mutated. pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { - /// The length of the array. + /// Returns the length of the array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// + /// let mut builder = PointBuilder::new(); + /// assert_eq!(builder.len(), 0); + /// builder.push_point(Some(&geo::point!(x: 1., y: 2.))); + /// assert_eq!(builder.len(), 1); + /// ``` fn len(&self) -> usize; - /// Whether the array is empty. + /// Returns whether the array is empty. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// + /// let mut builder = PointBuilder::new(); + /// assert!(builder.is_empty()); + /// builder.push_point(Some(&geo::point!(x: 1., y: 2.))); + /// assert!(!builder.is_empty()); + /// ``` fn is_empty(&self) -> bool { self.len() == 0 } - /// The optional validity of the array. + /// Returns the validity buffer of this array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// + /// let builder = PointBuilder::<2>::new(); + /// assert!(builder.nulls().is_empty()); + /// ``` fn nulls(&self) -> &NullBufferBuilder; + /// Creates a new builder. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// let builder = PointBuilder::<2>::new(); + /// ``` fn new() -> Self; + /// Creates a new builder with capacity and other options. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::{PointBuilder, CoordType, metadata::{ArrayMetadata, Edges}}, + /// trait_::GeometryArrayBuilder, + /// }; + /// let metadata = ArrayMetadata { + /// crs: None, + /// edges: Some(Edges::Spherical), + /// }; + /// let builder = PointBuilder::<2>::with_geom_capacity_and_options( + /// 2, + /// CoordType::Interleaved, + /// metadata.into() + /// ); + /// ``` fn with_geom_capacity_and_options( geom_capacity: usize, coord_type: CoordType, metadata: Arc<ArrayMetadata>, ) -> Self; + /// Creates a new builder with the given capacity. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::PointBuilder, + /// trait_::GeometryArrayBuilder, + /// }; + /// let builder = PointBuilder::<2>::with_geom_capacity(2); + /// ``` fn with_geom_capacity(geom_capacity: usize) -> Self { GeometryArrayBuilder::with_geom_capacity_and_options( geom_capacity, @@ -316,13 +766,58 @@ pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { ) } + /// Sets this builders metadata. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{ + /// array::{PointBuilder, metadata::{ArrayMetadata, Edges}}, + /// trait_::GeometryArrayBuilder, + /// }; + /// let mut builder = PointBuilder::<2>::new(); + /// let metadata = ArrayMetadata { + /// crs: None, + /// edges: Some(Edges::Spherical), + /// }; + /// builder.set_metadata(metadata.into()); + /// ``` fn set_metadata(&mut self, metadata: Arc<ArrayMetadata>); + /// Finishes building the underlying data structures and returns a geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::{GeometryArrayBuilder, GeometryArrayTrait}}; + /// + /// let mut builder = PointBuilder::new(); + /// builder.push_point(Some(&geo::point!(x: 1., y: 2.))); + /// let array = builder.finish(); + /// assert_eq!(array.len(), 1); + /// ``` fn finish(self) -> Arc<dyn GeometryArrayTrait>; - /// Get the coordinate type of this geometry array, either interleaved or separated. + /// Returns the [CoordType] of this geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::{PointBuilder, CoordType}, trait_::GeometryArrayBuilder}; + /// let builder = PointBuilder::<2>::new(); + /// assert_eq!(builder.coord_type(), CoordType::Interleaved); + /// ``` fn coord_type(&self) -> CoordType; + /// Returns the [ArrayMetadata] of this geometry array. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::{PointBuilder, CoordType}, trait_::GeometryArrayBuilder}; + /// let builder = PointBuilder::<2>::new(); + /// let metadata = builder.metadata(); + /// ``` fn metadata(&self) -> Arc<ArrayMetadata>; // /// Convert itself to an (immutable) [`GeometryArray`]. @@ -356,5 +851,14 @@ pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { // /// Shrink the array to fit its length. // fn shrink_to_fit(&mut self); + /// Converts this builder into an [`ArrayRef`], a dynamic array reference. + /// + /// # Examples + /// + /// ``` + /// use geoarrow::{array::PointBuilder, trait_::GeometryArrayBuilder}; + /// let builder = PointBuilder::<2>::new(); + /// let array_ref = builder.into_array_ref(); + /// ``` fn into_array_ref(self) -> Arc<dyn Array>; }