From 24a4a8cc3a499157ee6c7c0714620e2d6834614d Mon Sep 17 00:00:00 2001 From: Pete Gadomski <pete.gadomski@gmail.com> Date: Tue, 13 Aug 2024 15:17:52 -0600 Subject: [PATCH] refactor!: consolidate on (batches, schema) --- src/algorithm/native/explode.rs | 2 +- src/io/gdal/reader.rs | 2 +- src/io/geozero/table/builder/table.rs | 2 +- src/io/ipc/reader.rs | 4 ++-- src/io/parquet/reader/async.rs | 2 +- src/io/parquet/reader/builder.rs | 2 +- src/io/stream.rs | 2 +- src/table.rs | 22 +++++++++++----------- src/test/point.rs | 2 +- 9 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/algorithm/native/explode.rs b/src/algorithm/native/explode.rs index c7a03a9c..1b43fa86 100644 --- a/src/algorithm/native/explode.rs +++ b/src/algorithm/native/explode.rs @@ -286,7 +286,7 @@ impl ExplodeTable for Table { schema_builder.push(field.clone()); let schema = schema_builder.finish(); - Table::try_new(schema.into(), new_batches) + Table::try_new(new_batches, schema.into()) } else { // No take is necessary; nothing happens Ok(self.clone()) diff --git a/src/io/gdal/reader.rs b/src/io/gdal/reader.rs index 1c14234c..fb6df890 100644 --- a/src/io/gdal/reader.rs +++ b/src/io/gdal/reader.rs @@ -40,7 +40,7 @@ pub fn read_gdal(layer: &mut Layer, batch_size: Option<usize>) -> Result<Table> .into_iter() .collect::<std::result::Result<Vec<RecordBatch>, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } #[cfg(test)] diff --git a/src/io/geozero/table/builder/table.rs b/src/io/geozero/table/builder/table.rs index ce64e699..78b1e998 100644 --- a/src/io/geozero/table/builder/table.rs +++ b/src/io/geozero/table/builder/table.rs @@ -204,7 +204,7 @@ impl<G: GeometryArrayBuilder + GeomProcessor> GeoTableBuilder<G> { let batches = self.batches; let schema = batches[0].schema(); - let mut table = Table::try_new(schema, batches)?; + let mut table = Table::try_new(batches, schema)?; let geom_slices = self .geom_arrays diff --git a/src/io/ipc/reader.rs b/src/io/ipc/reader.rs index 11ed2e96..d05e236a 100644 --- a/src/io/ipc/reader.rs +++ b/src/io/ipc/reader.rs @@ -11,7 +11,7 @@ pub fn read_ipc<R: Read + Seek>(reader: R) -> Result<Table> { let reader = FileReader::try_new(reader, None)?; let schema = reader.schema(); let batches = reader.collect::<std::result::Result<Vec<_>, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } /// Read into a Table from Arrow IPC record batch stream. @@ -19,5 +19,5 @@ pub fn read_ipc_stream<R: Read>(reader: R) -> Result<Table> { let reader = StreamReader::try_new(reader, None)?; let schema = reader.schema(); let batches = reader.collect::<std::result::Result<Vec<_>, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } diff --git a/src/io/parquet/reader/async.rs b/src/io/parquet/reader/async.rs index 05e59ad7..170e1497 100644 --- a/src/io/parquet/reader/async.rs +++ b/src/io/parquet/reader/async.rs @@ -152,7 +152,7 @@ impl<T: AsyncFileReader + Unpin + Send + 'static> GeoParquetRecordBatchStream<T> pub async fn read_table(self) -> Result<Table> { let output_schema = self.output_schema.clone(); let batches = self.read_stream().try_collect::<_>().await?; - Table::try_new(output_schema, batches) + Table::try_new(batches, output_schema) } } diff --git a/src/io/parquet/reader/builder.rs b/src/io/parquet/reader/builder.rs index 5e6b33e5..5d02d453 100644 --- a/src/io/parquet/reader/builder.rs +++ b/src/io/parquet/reader/builder.rs @@ -127,7 +127,7 @@ impl GeoParquetRecordBatchReader { pub fn read_table(self) -> Result<Table> { let output_schema = self.output_schema.clone(); let batches = self.collect::<std::result::Result<Vec<_>, ArrowError>>()?; - Table::try_new(output_schema, batches) + Table::try_new(batches, output_schema) } } diff --git a/src/io/stream.rs b/src/io/stream.rs index 069daa1f..19534e91 100644 --- a/src/io/stream.rs +++ b/src/io/stream.rs @@ -27,7 +27,7 @@ impl RecordBatchReader { impl From<Table> for RecordBatchReader { fn from(value: Table) -> Self { - let (schema, batches) = value.into_inner(); + let (batches, schema) = value.into_inner(); Self(Some(Box::new(RecordBatchIterator::new( batches.into_iter().map(Ok), schema, diff --git a/src/table.rs b/src/table.rs index f10df235..dd0411f9 100644 --- a/src/table.rs +++ b/src/table.rs @@ -62,9 +62,9 @@ impl Table { /// let schema: SchemaRef = Schema::new(vec![field]).into(); /// let columns = vec![array.into_array_ref()]; /// let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); - /// let table = Table::try_new(schema, vec![batch]).unwrap(); + /// let table = Table::try_new(vec![batch], schema).unwrap(); /// ``` - pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> { + pub fn try_new(batches: Vec<RecordBatch>, schema: SchemaRef) -> Result<Self> { for batch in batches.iter() { // Don't check schema metadata in comparisons. // TODO: I have some issues in the Parquet reader where the batches are missing the @@ -116,11 +116,11 @@ impl Table { /// vec![Arc::new(id_array)] /// ).unwrap(); /// - /// let table = Table::from_arrow_and_geometry(schema_ref, vec![batch], Arc::new(chunked_array)).unwrap(); + /// let table = Table::from_arrow_and_geometry(vec![batch], schema_ref, Arc::new(chunked_array)).unwrap(); /// ``` pub fn from_arrow_and_geometry( - schema: SchemaRef, batches: Vec<RecordBatch>, + schema: SchemaRef, geometry: Arc<dyn ChunkedGeometryArrayTrait>, ) -> Result<Self> { if batches.is_empty() { @@ -138,7 +138,7 @@ impl Table { new_batches.push(RecordBatch::try_new(new_schema.clone(), columns)?); } - Self::try_new(new_schema, new_batches) + Self::try_new(new_batches, new_schema) } /// Casts the geometry at `index` to a different data type @@ -269,7 +269,7 @@ impl Table { target_geo_data_type: Option<GeoDataType>, ) -> Result<Self> { if batches.is_empty() { - return Self::try_new(schema, batches); + return Self::try_new(batches, schema); } let num_batches = batches.len(); @@ -369,7 +369,7 @@ impl Table { new_record_batches.push(RecordBatch::try_new(new_schema.clone(), new_batch).unwrap()); } - Table::try_new(new_schema, new_record_batches) + Table::try_new(new_record_batches, new_schema) } /// Returns the length of this table. @@ -419,11 +419,11 @@ impl Table { /// /// let file = File::open("fixtures/roads.geojson").unwrap(); /// let table = geoarrow::io::geojson::read_geojson(file, Default::default()).unwrap(); - /// let (schema, record_batches) = table.into_inner(); + /// let (batches, schema) = table.into_inner(); /// # } /// ``` - pub fn into_inner(self) -> (SchemaRef, Vec<RecordBatch>) { - (self.schema, self.batches) + pub fn into_inner(self) -> (Vec<RecordBatch>, SchemaRef) { + (self.batches, self.schema) } /// Returns a reference to this table's schema. @@ -706,6 +706,6 @@ impl TryFrom<Box<dyn arrow_array::RecordBatchReader>> for Table { let batches = value .into_iter() .collect::<std::result::Result<Vec<_>, ArrowError>>()?; - Table::try_new(schema, batches) + Table::try_new(batches, schema) } } diff --git a/src/test/point.rs b/src/test/point.rs index f781987c..6bbb4e74 100644 --- a/src/test/point.rs +++ b/src/test/point.rs @@ -53,5 +53,5 @@ pub(crate) fn table() -> Table { ) .unwrap(); - Table::try_new(schema, vec![batch]).unwrap() + Table::try_new(vec![batch], schema).unwrap() }