forked from apache/arrow-adbc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(csharp/src/Apache.Arrow.Adbc): correct StandardSchemas.ColumnSche…
…ma data types (apache#1731) 1. Corrects the two fields in `StandardSchemas.ColumnSchema` 2. Adds unit tests for the `StandardSchemas` 3. Adds extension methods to validate data using `Schema` or `IReadOnlyList<Field>`. 4. Add extension method to `CreateNestedListArray` Resolves apache#1729
- Loading branch information
1 parent
fa8c249
commit bec71a4
Showing
7 changed files
with
707 additions
and
302 deletions.
There are no files selected for viewing
157 changes: 157 additions & 0 deletions
157
csharp/src/Apache.Arrow.Adbc/Extensions/ListArrayExtensions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using Apache.Arrow.Types; | ||
|
||
namespace Apache.Arrow.Adbc.Extensions | ||
{ | ||
internal static class ListArrayExtensions | ||
{ | ||
/// <summary> | ||
/// Builds a <see cref="ListArray"/> from a list of <see cref="IArrowArray"/> data for the given datatype <see cref="IArrowArray"/>. | ||
/// It concatenates the contained data into a single ListArray. | ||
/// </summary> | ||
/// <param name="list">The list of data to build from.</param> | ||
/// <param name="dataType">The data type of the contained data.</param> | ||
/// <returns>A <see cref="ListArray"/> of the data.</returns> | ||
public static ListArray BuildListArrayForType(this IReadOnlyList<IArrowArray?> list, IArrowType dataType) | ||
{ | ||
ArrowBuffer.Builder<int> valueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>(); | ||
ArrowBuffer.BitmapBuilder validityBufferBuilder = new ArrowBuffer.BitmapBuilder(); | ||
List<ArrayData> arrayDataList = new List<ArrayData>(list.Count); | ||
int length = 0; | ||
int nullCount = 0; | ||
|
||
foreach (IArrowArray? array in list) | ||
{ | ||
if (array == null) | ||
{ | ||
valueOffsetsBufferBuilder.Append(length); | ||
validityBufferBuilder.Append(false); | ||
nullCount++; | ||
} | ||
else | ||
{ | ||
valueOffsetsBufferBuilder.Append(length); | ||
validityBufferBuilder.Append(true); | ||
arrayDataList.Add(array.Data); | ||
length += array.Length; | ||
} | ||
} | ||
|
||
ArrowBuffer validityBuffer = nullCount > 0 | ||
? validityBufferBuilder.Build() : ArrowBuffer.Empty; | ||
|
||
ArrayData? data = ArrayDataConcatenator.Concatenate(arrayDataList); | ||
|
||
if (data == null) | ||
{ | ||
EmptyArrayCreationVisitor visitor = new EmptyArrayCreationVisitor(); | ||
dataType.Accept(visitor); | ||
data = visitor.Result; | ||
} | ||
|
||
IArrowArray value = ArrowArrayFactory.BuildArray(data); | ||
|
||
valueOffsetsBufferBuilder.Append(length); | ||
|
||
return new ListArray(new ListType(dataType), list.Count, | ||
valueOffsetsBufferBuilder.Build(), value, | ||
validityBuffer, nullCount, 0); | ||
} | ||
|
||
private class EmptyArrayCreationVisitor : | ||
IArrowTypeVisitor<BooleanType>, | ||
IArrowTypeVisitor<FixedWidthType>, | ||
IArrowTypeVisitor<BinaryType>, | ||
IArrowTypeVisitor<StringType>, | ||
IArrowTypeVisitor<ListType>, | ||
IArrowTypeVisitor<FixedSizeListType>, | ||
IArrowTypeVisitor<StructType>, | ||
IArrowTypeVisitor<MapType> | ||
{ | ||
public ArrayData? Result { get; private set; } | ||
|
||
public void Visit(BooleanType type) | ||
{ | ||
Result = new BooleanArray.Builder().Build().Data; | ||
} | ||
|
||
public void Visit(FixedWidthType type) | ||
{ | ||
Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, ArrowBuffer.Empty }); | ||
} | ||
|
||
public void Visit(BinaryType type) | ||
{ | ||
Result = new BinaryArray.Builder().Build().Data; | ||
} | ||
|
||
public void Visit(StringType type) | ||
{ | ||
Result = new StringArray.Builder().Build().Data; | ||
} | ||
|
||
public void Visit(ListType type) | ||
{ | ||
type.ValueDataType.Accept(this); | ||
ArrayData? child = Result; | ||
|
||
Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, MakeInt0Buffer() }, new[] { child }); | ||
} | ||
|
||
public void Visit(FixedSizeListType type) | ||
{ | ||
type.ValueDataType.Accept(this); | ||
ArrayData? child = Result; | ||
|
||
Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, new[] { child }); | ||
} | ||
|
||
public void Visit(StructType type) | ||
{ | ||
ArrayData?[] children = new ArrayData[type.Fields.Count]; | ||
for (int i = 0; i < type.Fields.Count; i++) | ||
{ | ||
type.Fields[i].DataType.Accept(this); | ||
children[i] = Result; | ||
} | ||
|
||
Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, children); | ||
} | ||
|
||
public void Visit(MapType type) | ||
{ | ||
Result = new MapArray.Builder(type).Build().Data; | ||
} | ||
|
||
public void Visit(IArrowType type) | ||
{ | ||
throw new NotImplementedException($"EmptyArrayCreationVisitor for {type.Name} is not supported yet."); | ||
} | ||
|
||
private static ArrowBuffer MakeInt0Buffer() | ||
{ | ||
ArrowBuffer.Builder<int> builder = new ArrowBuffer.Builder<int>(); | ||
builder.Append(0); | ||
return builder.Build(); | ||
} | ||
} | ||
} | ||
} |
93 changes: 93 additions & 0 deletions
93
csharp/src/Apache.Arrow.Adbc/Extensions/StandardSchemaExtensions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Apache.Arrow.Types; | ||
|
||
namespace Apache.Arrow.Adbc.Extensions | ||
{ | ||
internal static class StandardSchemaExtensions | ||
{ | ||
|
||
/// <summary> | ||
/// Validates a data array that its column number and types match a given schema. | ||
/// </summary> | ||
/// <param name="schema">The schema to validate against.</param> | ||
/// <param name="data">The data array to validate.</param> | ||
/// <exception cref="ArgumentException">Throws an exception if the number of columns or type data types in the data array do not match the schema fields.</exception> | ||
public static IReadOnlyList<IArrowArray> Validate(this Schema schema, IReadOnlyList<IArrowArray> data) | ||
{ | ||
Validate(schema.FieldsList, data); | ||
return data; | ||
} | ||
|
||
/// <summary> | ||
/// Validates a data array that its column number and types match given schema fields. | ||
/// </summary> | ||
/// <param name="schemaFields">The schema fields to validate against.</param> | ||
/// <param name="data">The data array to validate.</param> | ||
/// <exception cref="ArgumentException">Throws an exception if the number of columns or type data types in the data array do not match the schema fields.</exception> | ||
public static IReadOnlyList<IArrowArray> Validate(this IReadOnlyList<Field> schemaFields, IReadOnlyList<IArrowArray> data) | ||
{ | ||
if (schemaFields.Count != data.Count) | ||
{ | ||
throw new ArgumentException($"Expected number of columns {schemaFields.Count} not equal to actual length {data.Count}", nameof(data)); | ||
} | ||
for (int i = 0; i < schemaFields.Count; i++) | ||
{ | ||
Field field = schemaFields[i]; | ||
ArrayData dataField = data[i].Data; | ||
if (field.DataType.TypeId != dataField.DataType.TypeId) | ||
{ | ||
throw new ArgumentException($"Expecting data type {field.DataType} but found {data[i].Data.DataType} on field with name {field.Name}.", nameof(data)); | ||
} | ||
if (field.DataType.TypeId == ArrowTypeId.Struct) | ||
{ | ||
StructType structType = (StructType)field.DataType; | ||
Validate(structType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList()); | ||
} | ||
else if (field.DataType.TypeId == ArrowTypeId.List) | ||
{ | ||
ListType listType = (ListType)field.DataType; | ||
if (listType.Fields.Count > 0) | ||
{ | ||
Validate(listType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList()); | ||
} | ||
} | ||
else if (field.DataType.TypeId == ArrowTypeId.Union) | ||
{ | ||
UnionType unionType = (UnionType)field.DataType; | ||
if (unionType.Fields.Count > 0) | ||
{ | ||
Validate(unionType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList()); | ||
} | ||
} | ||
} | ||
|
||
return data; | ||
} | ||
|
||
private class ContainerArray : Array | ||
{ | ||
public ContainerArray(ArrayData data) : base(data) | ||
{ | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one or more | ||
// contributor license agreements. See the NOTICE file distributed with | ||
// this work for additional information regarding copyright ownership. | ||
// The ASF licenses this file to You under the Apache License, Version 2.0 | ||
// (the "License"); you may not use this file except in compliance with | ||
// the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
using System.Runtime.CompilerServices; | ||
|
||
[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Drivers.Apache, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] | ||
[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Drivers.BigQuery, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] | ||
[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.