Skip to content

Commit

Permalink
fix(csharp/src/Apache.Arrow.Adbc): correct StandardSchemas.ColumnSche…
Browse files Browse the repository at this point in the history
…ma data types (apache#1731)

1. Corrects the two fields in `StandardSchemas.ColumnSchema`
2. Adds unit tests for the `StandardSchemas`
3. Adds extension methods to validate data using `Schema` or
`IReadOnlyList<Field>`.
4. Add extension method to `CreateNestedListArray`

Resolves apache#1729
  • Loading branch information
birschick-bq authored and cocoa-xu committed May 8, 2024
1 parent fa8c249 commit bec71a4
Show file tree
Hide file tree
Showing 7 changed files with 707 additions and 302 deletions.
157 changes: 157 additions & 0 deletions csharp/src/Apache.Arrow.Adbc/Extensions/ListArrayExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Collections.Generic;
using Apache.Arrow.Types;

namespace Apache.Arrow.Adbc.Extensions
{
internal static class ListArrayExtensions
{
/// <summary>
/// Builds a <see cref="ListArray"/> from a list of <see cref="IArrowArray"/> data for the given datatype <see cref="IArrowArray"/>.
/// It concatenates the contained data into a single ListArray.
/// </summary>
/// <param name="list">The list of data to build from.</param>
/// <param name="dataType">The data type of the contained data.</param>
/// <returns>A <see cref="ListArray"/> of the data.</returns>
public static ListArray BuildListArrayForType(this IReadOnlyList<IArrowArray?> list, IArrowType dataType)
{
ArrowBuffer.Builder<int> valueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>();
ArrowBuffer.BitmapBuilder validityBufferBuilder = new ArrowBuffer.BitmapBuilder();
List<ArrayData> arrayDataList = new List<ArrayData>(list.Count);
int length = 0;
int nullCount = 0;

foreach (IArrowArray? array in list)
{
if (array == null)
{
valueOffsetsBufferBuilder.Append(length);
validityBufferBuilder.Append(false);
nullCount++;
}
else
{
valueOffsetsBufferBuilder.Append(length);
validityBufferBuilder.Append(true);
arrayDataList.Add(array.Data);
length += array.Length;
}
}

ArrowBuffer validityBuffer = nullCount > 0
? validityBufferBuilder.Build() : ArrowBuffer.Empty;

ArrayData? data = ArrayDataConcatenator.Concatenate(arrayDataList);

if (data == null)
{
EmptyArrayCreationVisitor visitor = new EmptyArrayCreationVisitor();
dataType.Accept(visitor);
data = visitor.Result;
}

IArrowArray value = ArrowArrayFactory.BuildArray(data);

valueOffsetsBufferBuilder.Append(length);

return new ListArray(new ListType(dataType), list.Count,
valueOffsetsBufferBuilder.Build(), value,
validityBuffer, nullCount, 0);
}

private class EmptyArrayCreationVisitor :
IArrowTypeVisitor<BooleanType>,
IArrowTypeVisitor<FixedWidthType>,
IArrowTypeVisitor<BinaryType>,
IArrowTypeVisitor<StringType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
IArrowTypeVisitor<MapType>
{
public ArrayData? Result { get; private set; }

public void Visit(BooleanType type)
{
Result = new BooleanArray.Builder().Build().Data;
}

public void Visit(FixedWidthType type)
{
Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, ArrowBuffer.Empty });
}

public void Visit(BinaryType type)
{
Result = new BinaryArray.Builder().Build().Data;
}

public void Visit(StringType type)
{
Result = new StringArray.Builder().Build().Data;
}

public void Visit(ListType type)
{
type.ValueDataType.Accept(this);
ArrayData? child = Result;

Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, MakeInt0Buffer() }, new[] { child });
}

public void Visit(FixedSizeListType type)
{
type.ValueDataType.Accept(this);
ArrayData? child = Result;

Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, new[] { child });
}

public void Visit(StructType type)
{
ArrayData?[] children = new ArrayData[type.Fields.Count];
for (int i = 0; i < type.Fields.Count; i++)
{
type.Fields[i].DataType.Accept(this);
children[i] = Result;
}

Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, children);
}

public void Visit(MapType type)
{
Result = new MapArray.Builder(type).Build().Data;
}

public void Visit(IArrowType type)
{
throw new NotImplementedException($"EmptyArrayCreationVisitor for {type.Name} is not supported yet.");
}

private static ArrowBuffer MakeInt0Buffer()
{
ArrowBuffer.Builder<int> builder = new ArrowBuffer.Builder<int>();
builder.Append(0);
return builder.Build();
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Collections.Generic;
using System.Linq;
using Apache.Arrow.Types;

namespace Apache.Arrow.Adbc.Extensions
{
internal static class StandardSchemaExtensions
{

/// <summary>
/// Validates a data array that its column number and types match a given schema.
/// </summary>
/// <param name="schema">The schema to validate against.</param>
/// <param name="data">The data array to validate.</param>
/// <exception cref="ArgumentException">Throws an exception if the number of columns or type data types in the data array do not match the schema fields.</exception>
public static IReadOnlyList<IArrowArray> Validate(this Schema schema, IReadOnlyList<IArrowArray> data)
{
Validate(schema.FieldsList, data);
return data;
}

/// <summary>
/// Validates a data array that its column number and types match given schema fields.
/// </summary>
/// <param name="schemaFields">The schema fields to validate against.</param>
/// <param name="data">The data array to validate.</param>
/// <exception cref="ArgumentException">Throws an exception if the number of columns or type data types in the data array do not match the schema fields.</exception>
public static IReadOnlyList<IArrowArray> Validate(this IReadOnlyList<Field> schemaFields, IReadOnlyList<IArrowArray> data)
{
if (schemaFields.Count != data.Count)
{
throw new ArgumentException($"Expected number of columns {schemaFields.Count} not equal to actual length {data.Count}", nameof(data));
}
for (int i = 0; i < schemaFields.Count; i++)
{
Field field = schemaFields[i];
ArrayData dataField = data[i].Data;
if (field.DataType.TypeId != dataField.DataType.TypeId)
{
throw new ArgumentException($"Expecting data type {field.DataType} but found {data[i].Data.DataType} on field with name {field.Name}.", nameof(data));
}
if (field.DataType.TypeId == ArrowTypeId.Struct)
{
StructType structType = (StructType)field.DataType;
Validate(structType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList());
}
else if (field.DataType.TypeId == ArrowTypeId.List)
{
ListType listType = (ListType)field.DataType;
if (listType.Fields.Count > 0)
{
Validate(listType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList());
}
}
else if (field.DataType.TypeId == ArrowTypeId.Union)
{
UnionType unionType = (UnionType)field.DataType;
if (unionType.Fields.Count > 0)
{
Validate(unionType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList());
}
}
}

return data;
}

private class ContainerArray : Array
{
public ContainerArray(ArrayData data) : base(data)
{
}
}
}
}
20 changes: 20 additions & 0 deletions csharp/src/Apache.Arrow.Adbc/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System.Runtime.CompilerServices;

[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Drivers.Apache, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Drivers.BigQuery, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
5 changes: 2 additions & 3 deletions csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ public static class StandardSchemas
new Field("xdbc_scope_catalog", StringType.Default, true),
new Field("xdbc_scope_schema", StringType.Default, true),
new Field("xdbc_scope_table", StringType.Default, true),
new Field("xdbc_is_autoincrement", StringType.Default, true),
new Field("xdbc_is_generatedcolumn", StringType.Default, true)
new Field("xdbc_is_autoincrement", BooleanType.Default, true),
new Field("xdbc_is_generatedcolumn", BooleanType.Default, true)
};

public static readonly IReadOnlyList<Field> TableSchema = new Field[] {
Expand Down Expand Up @@ -177,5 +177,4 @@ public static class StandardSchemas
metadata: null
);
}

}
Loading

0 comments on commit bec71a4

Please sign in to comment.