diff --git a/csharp/src/Apache.Arrow.Adbc/Extensions/ListArrayExtensions.cs b/csharp/src/Apache.Arrow.Adbc/Extensions/ListArrayExtensions.cs new file mode 100644 index 0000000000..749af550cf --- /dev/null +++ b/csharp/src/Apache.Arrow.Adbc/Extensions/ListArrayExtensions.cs @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Adbc.Extensions +{ + internal static class ListArrayExtensions + { + /// + /// Builds a from a list of data for the given datatype . + /// It concatenates the contained data into a single ListArray. + /// + /// The list of data to build from. + /// The data type of the contained data. + /// A of the data. + public static ListArray BuildListArrayForType(this IReadOnlyList list, IArrowType dataType) + { + ArrowBuffer.Builder valueOffsetsBufferBuilder = new ArrowBuffer.Builder(); + ArrowBuffer.BitmapBuilder validityBufferBuilder = new ArrowBuffer.BitmapBuilder(); + List arrayDataList = new List(list.Count); + int length = 0; + int nullCount = 0; + + foreach (IArrowArray? array in list) + { + if (array == null) + { + valueOffsetsBufferBuilder.Append(length); + validityBufferBuilder.Append(false); + nullCount++; + } + else + { + valueOffsetsBufferBuilder.Append(length); + validityBufferBuilder.Append(true); + arrayDataList.Add(array.Data); + length += array.Length; + } + } + + ArrowBuffer validityBuffer = nullCount > 0 + ? validityBufferBuilder.Build() : ArrowBuffer.Empty; + + ArrayData? data = ArrayDataConcatenator.Concatenate(arrayDataList); + + if (data == null) + { + EmptyArrayCreationVisitor visitor = new EmptyArrayCreationVisitor(); + dataType.Accept(visitor); + data = visitor.Result; + } + + IArrowArray value = ArrowArrayFactory.BuildArray(data); + + valueOffsetsBufferBuilder.Append(length); + + return new ListArray(new ListType(dataType), list.Count, + valueOffsetsBufferBuilder.Build(), value, + validityBuffer, nullCount, 0); + } + + private class EmptyArrayCreationVisitor : + IArrowTypeVisitor, + IArrowTypeVisitor, + IArrowTypeVisitor, + IArrowTypeVisitor, + IArrowTypeVisitor, + IArrowTypeVisitor, + IArrowTypeVisitor, + IArrowTypeVisitor + { + public ArrayData? Result { get; private set; } + + public void Visit(BooleanType type) + { + Result = new BooleanArray.Builder().Build().Data; + } + + public void Visit(FixedWidthType type) + { + Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, ArrowBuffer.Empty }); + } + + public void Visit(BinaryType type) + { + Result = new BinaryArray.Builder().Build().Data; + } + + public void Visit(StringType type) + { + Result = new StringArray.Builder().Build().Data; + } + + public void Visit(ListType type) + { + type.ValueDataType.Accept(this); + ArrayData? child = Result; + + Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, MakeInt0Buffer() }, new[] { child }); + } + + public void Visit(FixedSizeListType type) + { + type.ValueDataType.Accept(this); + ArrayData? child = Result; + + Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, new[] { child }); + } + + public void Visit(StructType type) + { + ArrayData?[] children = new ArrayData[type.Fields.Count]; + for (int i = 0; i < type.Fields.Count; i++) + { + type.Fields[i].DataType.Accept(this); + children[i] = Result; + } + + Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, children); + } + + public void Visit(MapType type) + { + Result = new MapArray.Builder(type).Build().Data; + } + + public void Visit(IArrowType type) + { + throw new NotImplementedException($"EmptyArrayCreationVisitor for {type.Name} is not supported yet."); + } + + private static ArrowBuffer MakeInt0Buffer() + { + ArrowBuffer.Builder builder = new ArrowBuffer.Builder(); + builder.Append(0); + return builder.Build(); + } + } + } +} diff --git a/csharp/src/Apache.Arrow.Adbc/Extensions/StandardSchemaExtensions.cs b/csharp/src/Apache.Arrow.Adbc/Extensions/StandardSchemaExtensions.cs new file mode 100644 index 0000000000..3e95aac483 --- /dev/null +++ b/csharp/src/Apache.Arrow.Adbc/Extensions/StandardSchemaExtensions.cs @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Adbc.Extensions +{ + internal static class StandardSchemaExtensions + { + + /// + /// Validates a data array that its column number and types match a given schema. + /// + /// The schema to validate against. + /// The data array to validate. + /// Throws an exception if the number of columns or type data types in the data array do not match the schema fields. + public static IReadOnlyList Validate(this Schema schema, IReadOnlyList data) + { + Validate(schema.FieldsList, data); + return data; + } + + /// + /// Validates a data array that its column number and types match given schema fields. + /// + /// The schema fields to validate against. + /// The data array to validate. + /// Throws an exception if the number of columns or type data types in the data array do not match the schema fields. + public static IReadOnlyList Validate(this IReadOnlyList schemaFields, IReadOnlyList data) + { + if (schemaFields.Count != data.Count) + { + throw new ArgumentException($"Expected number of columns {schemaFields.Count} not equal to actual length {data.Count}", nameof(data)); + } + for (int i = 0; i < schemaFields.Count; i++) + { + Field field = schemaFields[i]; + ArrayData dataField = data[i].Data; + if (field.DataType.TypeId != dataField.DataType.TypeId) + { + throw new ArgumentException($"Expecting data type {field.DataType} but found {data[i].Data.DataType} on field with name {field.Name}.", nameof(data)); + } + if (field.DataType.TypeId == ArrowTypeId.Struct) + { + StructType structType = (StructType)field.DataType; + Validate(structType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList()); + } + else if (field.DataType.TypeId == ArrowTypeId.List) + { + ListType listType = (ListType)field.DataType; + if (listType.Fields.Count > 0) + { + Validate(listType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList()); + } + } + else if (field.DataType.TypeId == ArrowTypeId.Union) + { + UnionType unionType = (UnionType)field.DataType; + if (unionType.Fields.Count > 0) + { + Validate(unionType.Fields, dataField.Children.Select(e => new ContainerArray(e)).ToList()); + } + } + } + + return data; + } + + private class ContainerArray : Array + { + public ContainerArray(ArrayData data) : base(data) + { + } + } + } +} diff --git a/csharp/src/Apache.Arrow.Adbc/Properties/AssemblyInfo.cs b/csharp/src/Apache.Arrow.Adbc/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..302a85450e --- /dev/null +++ b/csharp/src/Apache.Arrow.Adbc/Properties/AssemblyInfo.cs @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Drivers.Apache, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] +[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Drivers.BigQuery, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] +[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] diff --git a/csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs b/csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs index d683f6959d..5f2c3a8541 100644 --- a/csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs +++ b/csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs @@ -127,8 +127,8 @@ public static class StandardSchemas new Field("xdbc_scope_catalog", StringType.Default, true), new Field("xdbc_scope_schema", StringType.Default, true), new Field("xdbc_scope_table", StringType.Default, true), - new Field("xdbc_is_autoincrement", StringType.Default, true), - new Field("xdbc_is_generatedcolumn", StringType.Default, true) + new Field("xdbc_is_autoincrement", BooleanType.Default, true), + new Field("xdbc_is_generatedcolumn", BooleanType.Default, true) }; public static readonly IReadOnlyList TableSchema = new Field[] { @@ -177,5 +177,4 @@ public static class StandardSchemas metadata: null ); } - } diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs index 23b81c99fc..ba18d581df 100644 --- a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs +++ b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs @@ -24,6 +24,7 @@ using System.Threading; using System.Threading.Tasks; using Apache.Arrow.Adbc.Drivers.Apache.Hive2; +using Apache.Arrow.Adbc.Extensions; using Apache.Arrow.Ipc; using Apache.Arrow.Types; using Apache.Hive.Service.Rpc.Thrift; @@ -257,7 +258,7 @@ public override IArrowArrayStream GetInfo(IReadOnlyList codes) new Int64Array.Builder().Build(), new Int32Array.Builder().Build(), new ListArray.Builder(StringType.Default).Build(), - CreateNestedListArray(new IArrowArray?[] { entriesDataArray }, entryType) + new List(){ entriesDataArray }.BuildListArrayForType(entryType) }; DenseUnionArray infoValue = new DenseUnionArray(infoUnionType, arrayLength, childrenArrays, typeBuilder.Build(), offsetBuilder.Build(), nullCount); @@ -267,6 +268,7 @@ public override IArrowArrayStream GetInfo(IReadOnlyList codes) infoNameBuilder.Build(), infoValue }; + StandardSchemas.GetInfoSchema.Validate(dataArrays); return new SparkInfoArrowStream(StandardSchemas.GetInfoSchema, dataArrays); @@ -464,12 +466,15 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string catal } } - IArrowArray[] dataArrays = new IArrowArray[] - { - catalogNameBuilder.Build(), - CreateNestedListArray(catalogDbSchemasValues, new StructType(StandardSchemas.DbSchemaSchema)), - }; - return new SparkInfoArrowStream(StandardSchemas.GetObjectsSchema, dataArrays); + Schema schema = StandardSchemas.GetObjectsSchema; + IReadOnlyList dataArrays = schema.Validate( + new List + { + catalogNameBuilder.Build(), + catalogDbSchemasValues.BuildListArrayForType(new StructType(StandardSchemas.DbSchemaSchema)), + }); + + return new SparkInfoArrowStream(schema, dataArrays); } private static IArrowType GetArrowType(ColumnTypeId columnTypeId, string typeName) @@ -542,14 +547,16 @@ private StructArray GetDbSchemas( } - IArrowArray[] dataArrays = new IArrowArray[] - { - dbSchemaNameBuilder.Build(), - CreateNestedListArray(dbSchemaTablesValues, new StructType(StandardSchemas.TableSchema)), - }; + IReadOnlyList schema = StandardSchemas.DbSchemaSchema; + IReadOnlyList dataArrays = schema.Validate( + new List + { + dbSchemaNameBuilder.Build(), + dbSchemaTablesValues.BuildListArrayForType(new StructType(StandardSchemas.TableSchema)), + }); return new StructArray( - new StructType(StandardSchemas.DbSchemaSchema), + new StructType(schema), length, dataArrays, nullBitmapBuffer.Build()); @@ -589,16 +596,18 @@ private StructArray GetTableSchemas( } - IArrowArray[] dataArrays = new IArrowArray[] - { - tableNameBuilder.Build(), - tableTypeBuilder.Build(), - CreateNestedListArray(tableColumnsValues, new StructType(StandardSchemas.ColumnSchema)), - CreateNestedListArray(tableConstraintsValues, new StructType(StandardSchemas.ConstraintSchema)) - }; + IReadOnlyList schema = StandardSchemas.TableSchema; + IReadOnlyList dataArrays = schema.Validate( + new List + { + tableNameBuilder.Build(), + tableTypeBuilder.Build(), + tableColumnsValues.BuildListArrayForType(new StructType(StandardSchemas.ColumnSchema)), + tableConstraintsValues.BuildListArrayForType( new StructType(StandardSchemas.ConstraintSchema)) + }); return new StructArray( - new StructType(StandardSchemas.TableSchema), + new StructType(schema), length, dataArrays, nullBitmapBuffer.Build()); @@ -660,160 +669,38 @@ private StructArray GetColumnSchema( length++; } - IArrowArray[] dataArrays = new IArrowArray[] - { - columnNameBuilder.Build(), - ordinalPositionBuilder.Build(), - remarksBuilder.Build(), - xdbcDataTypeBuilder.Build(), - xdbcTypeNameBuilder.Build(), - xdbcColumnSizeBuilder.Build(), - xdbcDecimalDigitsBuilder.Build(), - xdbcNumPrecRadixBuilder.Build(), - xdbcNullableBuilder.Build(), - xdbcColumnDefBuilder.Build(), - xdbcSqlDataTypeBuilder.Build(), - xdbcDatetimeSubBuilder.Build(), - xdbcCharOctetLengthBuilder.Build(), - xdbcIsNullableBuilder.Build(), - xdbcScopeCatalogBuilder.Build(), - xdbcScopeSchemaBuilder.Build(), - xdbcScopeTableBuilder.Build(), - xdbcIsAutoincrementBuilder.Build(), - xdbcIsGeneratedcolumnBuilder.Build() - }; + IReadOnlyList schema = StandardSchemas.ColumnSchema; + IReadOnlyList dataArrays = schema.Validate( + new List + { + columnNameBuilder.Build(), + ordinalPositionBuilder.Build(), + remarksBuilder.Build(), + xdbcDataTypeBuilder.Build(), + xdbcTypeNameBuilder.Build(), + xdbcColumnSizeBuilder.Build(), + xdbcDecimalDigitsBuilder.Build(), + xdbcNumPrecRadixBuilder.Build(), + xdbcNullableBuilder.Build(), + xdbcColumnDefBuilder.Build(), + xdbcSqlDataTypeBuilder.Build(), + xdbcDatetimeSubBuilder.Build(), + xdbcCharOctetLengthBuilder.Build(), + xdbcIsNullableBuilder.Build(), + xdbcScopeCatalogBuilder.Build(), + xdbcScopeSchemaBuilder.Build(), + xdbcScopeTableBuilder.Build(), + xdbcIsAutoincrementBuilder.Build(), + xdbcIsGeneratedcolumnBuilder.Build() + }); return new StructArray( - new StructType(StandardSchemas.ColumnSchema), + new StructType(schema), length, dataArrays, nullBitmapBuffer.Build()); } - private ListArray CreateNestedListArray(IReadOnlyList arrayList, IArrowType dataType) - { - ArrowBuffer.Builder valueOffsetsBufferBuilder = new ArrowBuffer.Builder(); - ArrowBuffer.BitmapBuilder validityBufferBuilder = new ArrowBuffer.BitmapBuilder(); - List arrayDataList = new List(arrayList.Count); - int length = 0; - int nullCount = 0; - - foreach (IArrowArray? array in arrayList) - { - if (array == null) - { - valueOffsetsBufferBuilder.Append(length); - validityBufferBuilder.Append(false); - nullCount++; - } - else - { - valueOffsetsBufferBuilder.Append(length); - validityBufferBuilder.Append(true); - arrayDataList.Add(array.Data); - length += array.Length; - } - } - - ArrowBuffer validityBuffer = nullCount > 0 - ? validityBufferBuilder.Build() : ArrowBuffer.Empty; - - ArrayData? data = ArrayDataConcatenator.Concatenate(arrayDataList); - - if (data == null) - { - EmptyArrayCreationVisitor visitor = new EmptyArrayCreationVisitor(); - dataType.Accept(visitor); - data = visitor.Result; - } - - IArrowArray value = ArrowArrayFactory.BuildArray(data); - - valueOffsetsBufferBuilder.Append(length); - - return new ListArray(new ListType(dataType), arrayList.Count, - valueOffsetsBufferBuilder.Build(), value, - validityBuffer, nullCount, 0); - } - - private class EmptyArrayCreationVisitor : - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor - { - public ArrayData? Result { get; private set; } - - public void Visit(BooleanType type) - { - Result = new BooleanArray.Builder().Build().Data; - } - - public void Visit(FixedWidthType type) - { - Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, ArrowBuffer.Empty }); - } - - public void Visit(BinaryType type) - { - Result = new BinaryArray.Builder().Build().Data; - } - - public void Visit(StringType type) - { - Result = new StringArray.Builder().Build().Data; - } - - public void Visit(ListType type) - { - type.ValueDataType.Accept(this); - ArrayData? child = Result; - - Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, MakeInt0Buffer() }, new[] { child }); - } - - public void Visit(FixedSizeListType type) - { - type.ValueDataType.Accept(this); - ArrayData? child = Result; - - Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, new[] { child }); - } - - public void Visit(StructType type) - { - ArrayData?[] children = new ArrayData[type.Fields.Count]; - for (int i = 0; i < type.Fields.Count; i++) - { - type.Fields[i].DataType.Accept(this); - children[i] = Result; - } - - Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, children); - } - - public void Visit(MapType type) - { - Result = new MapArray.Builder(type).Build().Data; - } - - public void Visit(IArrowType type) - { - throw new NotImplementedException($"EmptyArrayCreationVisitor for {type.Name} is not supported yet."); - } - - private static ArrowBuffer MakeInt0Buffer() - { - ArrowBuffer.Builder builder = new ArrowBuffer.Builder(); - builder.Append(0); - return builder.Build(); - } - } - private string PatternToRegEx(string pattern) { if (pattern == null) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 809ebe74d8..19b629d208 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -23,6 +23,7 @@ using System.Text; using System.Text.Json; using System.Text.RegularExpressions; +using Apache.Arrow.Adbc.Extensions; using Apache.Arrow.Ipc; using Apache.Arrow.Types; using Google.Api.Gax; @@ -244,7 +245,7 @@ public override IArrowArrayStream GetInfo(IReadOnlyList codes) new Int64Array.Builder().Build(), new Int32Array.Builder().Build(), new ListArray.Builder(StringType.Default).Build(), - CreateNestedListArray(new IArrowArray?[] { entriesDataArray }, entryType) + new List(){ entriesDataArray }.BuildListArrayForType(entryType) }; DenseUnionArray infoValue = new DenseUnionArray(infoUnionType, arrayLength, childrenArrays, typeBuilder.Build(), offsetBuilder.Build(), nullCount); @@ -254,6 +255,7 @@ public override IArrowArrayStream GetInfo(IReadOnlyList codes) infoNameBuilder.Build(), infoValue }; + StandardSchemas.GetInfoSchema.Validate(dataArrays); return new BigQueryInfoArrowStream(StandardSchemas.GetInfoSchema, dataArrays); } @@ -327,8 +329,9 @@ private IArrowArray[] GetCatalogs( IArrowArray[] dataArrays = new IArrowArray[] { catalogNameBuilder.Build(), - CreateNestedListArray(catalogDbSchemasValues, new StructType(StandardSchemas.DbSchemaSchema)), + catalogDbSchemasValues.BuildListArrayForType(new StructType(StandardSchemas.DbSchemaSchema)), }; + StandardSchemas.GetObjectsSchema.Validate(dataArrays); return dataArrays; } @@ -377,8 +380,9 @@ private StructArray GetDbSchemas( IArrowArray[] dataArrays = new IArrowArray[] { dbSchemaNameBuilder.Build(), - CreateNestedListArray(dbSchemaTablesValues, new StructType(StandardSchemas.TableSchema)), + dbSchemaTablesValues.BuildListArrayForType(new StructType(StandardSchemas.TableSchema)), }; + StandardSchemas.DbSchemaSchema.Validate(dataArrays); return new StructArray( new StructType(StandardSchemas.DbSchemaSchema), @@ -466,9 +470,10 @@ private StructArray GetTableSchemas( { tableNameBuilder.Build(), tableTypeBuilder.Build(), - CreateNestedListArray(tableColumnsValues, new StructType(StandardSchemas.ColumnSchema)), - CreateNestedListArray(tableConstraintsValues, new StructType(StandardSchemas.ConstraintSchema)) + tableColumnsValues.BuildListArrayForType(new StructType(StandardSchemas.ColumnSchema)), + tableConstraintsValues.BuildListArrayForType(new StructType(StandardSchemas.ConstraintSchema)) }; + StandardSchemas.TableSchema.Validate(dataArrays); return new StructArray( new StructType(StandardSchemas.TableSchema), @@ -577,6 +582,7 @@ private StructArray GetColumnSchema( xdbcIsAutoincrementBuilder.Build(), xdbcIsGeneratedcolumnBuilder.Build() }; + StandardSchemas.ColumnSchema.Validate(dataArrays); return new StructArray( new StructType(StandardSchemas.ColumnSchema), @@ -641,9 +647,10 @@ private StructArray GetConstraintSchema( { constraintNameBuilder.Build(), constraintTypeBuilder.Build(), - CreateNestedListArray(constraintColumnNamesValues, StringType.Default), - CreateNestedListArray(constraintColumnUsageValues, new StructType(StandardSchemas.UsageSchema)) + constraintColumnNamesValues.BuildListArrayForType(StringType.Default), + constraintColumnUsageValues.BuildListArrayForType(new StructType(StandardSchemas.UsageSchema)) }; + StandardSchemas.ConstraintSchema.Validate(dataArrays); return new StructArray( new StructType(StandardSchemas.ConstraintSchema), @@ -721,6 +728,7 @@ private StructArray GetConstraintsUsage( constraintFkTableBuilder.Build(), constraintFkColumnNameBuilder.Build() }; + StandardSchemas.UsageSchema.Validate(dataArrays); return new StructArray( new StructType(StandardSchemas.UsageSchema), @@ -961,56 +969,11 @@ public override IArrowArrayStream GetTableTypes() { tableTypesBuilder.Build() }; + StandardSchemas.TableTypesSchema.Validate(dataArrays); return new BigQueryInfoArrowStream(StandardSchemas.TableTypesSchema, dataArrays); } - private ListArray CreateNestedListArray(IReadOnlyList arrayList, IArrowType dataType) - { - ArrowBuffer.Builder valueOffsetsBufferBuilder = new ArrowBuffer.Builder(); - ArrowBuffer.BitmapBuilder validityBufferBuilder = new ArrowBuffer.BitmapBuilder(); - List arrayDataList = new List(arrayList.Count); - int length = 0; - int nullCount = 0; - - foreach (IArrowArray? array in arrayList) - { - if (array == null) - { - valueOffsetsBufferBuilder.Append(length); - validityBufferBuilder.Append(false); - nullCount++; - } - else - { - valueOffsetsBufferBuilder.Append(length); - validityBufferBuilder.Append(true); - arrayDataList.Add(array.Data); - length += array.Length; - } - } - - ArrowBuffer validityBuffer = nullCount > 0 - ? validityBufferBuilder.Build() : ArrowBuffer.Empty; - - ArrayData? data = ArrayDataConcatenator.Concatenate(arrayDataList); - - if (data == null) - { - EmptyArrayCreationVisitor visitor = new EmptyArrayCreationVisitor(); - dataType.Accept(visitor); - data = visitor.Result; - } - - IArrowArray value = ArrowArrayFactory.BuildArray(data); - - valueOffsetsBufferBuilder.Append(length); - - return new ListArray(new ListType(dataType), arrayList.Count, - valueOffsetsBufferBuilder.Build(), value, - validityBuffer, nullCount, 0); - } - public override AdbcStatement CreateStatement() { if (this.credential == null) @@ -1136,83 +1099,5 @@ enum XdbcDataType XdbcDataType_XDBC_WCHAR = -8, XdbcDataType_XDBC_WVARCHAR = -9, } - - private class EmptyArrayCreationVisitor : - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor - { - public ArrayData? Result { get; private set; } - - public void Visit(BooleanType type) - { - Result = new BooleanArray.Builder().Build().Data; - } - - public void Visit(FixedWidthType type) - { - Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, ArrowBuffer.Empty }); - } - - public void Visit(BinaryType type) - { - Result = new BinaryArray.Builder().Build().Data; - } - - public void Visit(StringType type) - { - Result = new StringArray.Builder().Build().Data; - } - - public void Visit(ListType type) - { - type.ValueDataType.Accept(this); - ArrayData? child = Result; - - Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty, MakeInt0Buffer() }, new[] { child }); - } - - public void Visit(FixedSizeListType type) - { - type.ValueDataType.Accept(this); - ArrayData? child = Result; - - Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, new[] { child }); - } - - public void Visit(StructType type) - { - ArrayData?[] children = new ArrayData[type.Fields.Count]; - for (int i = 0; i < type.Fields.Count; i++) - { - type.Fields[i].DataType.Accept(this); - children[i] = Result; - } - - Result = new ArrayData(type, 0, 0, 0, new[] { ArrowBuffer.Empty }, children); - } - - public void Visit(MapType type) - { - Result = new MapArray.Builder(type).Build().Data; - } - - public void Visit(IArrowType type) - { - throw new NotImplementedException($"EmptyArrayCreationVisitor for {type.Name} is not supported yet."); - } - - private static ArrowBuffer MakeInt0Buffer() - { - ArrowBuffer.Builder builder = new ArrowBuffer.Builder(); - builder.Append(0); - return builder.Build(); - } - } } } diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/StandardSchemasTests.cs b/csharp/test/Apache.Arrow.Adbc.Tests/StandardSchemasTests.cs new file mode 100644 index 0000000000..0c40b80233 --- /dev/null +++ b/csharp/test/Apache.Arrow.Adbc.Tests/StandardSchemasTests.cs @@ -0,0 +1,364 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System; +using System.Collections.Generic; +using System.Linq; +using Apache.Arrow.Adbc.Extensions; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Adbc.Tests +{ + /// + /// Validate StandardSchema. + /// + public class StandardSchemasTests + { + [Fact] + public void CanValidateSchema() + { + IReadOnlyList columnDataArrays = StandardSchemas.ColumnSchema.Validate(GetColumnDataArrays()); + IReadOnlyList tableDataArrays = StandardSchemas.TableSchema.Validate(GetTableDataArrays(columnDataArrays)); + IReadOnlyList schemaDataArrays = StandardSchemas.DbSchemaSchema.Validate(GetDbSchemaDataArrays(tableDataArrays)); + _ = StandardSchemas.GetObjectsSchema.Validate(GetGetObjectsDataArrays(schemaDataArrays)); + } + + [Fact] + public void CanInvalidateColumnTypeSchema() + { + Assert.Throws(() => StandardSchemas.ColumnSchema.Validate(GetColumnDataArraysInvalidColumnType())); + } + + [Fact] + public void CanInvalidateColumnCountSchema() + { + Assert.Throws(() => StandardSchemas.ColumnSchema.Validate(GetColumnDataArraysMissingColumn())); + } + + [Fact] + public void CanInvalidateTableSchema() + { + IReadOnlyList columnDataArrays = GetColumnDataArraysInvalidColumnType(); + Assert.Throws(() => StandardSchemas.TableSchema.Validate(GetTableDataArrays(columnDataArrays))); + } + + [Fact] + public void CanInvalidateDbSchema() + { + IReadOnlyList columnDataArrays = GetColumnDataArraysInvalidColumnType(); + IReadOnlyList tableDataArrays = GetTableDataArrays(columnDataArrays); + Assert.Throws(() => StandardSchemas.DbSchemaSchema.Validate(GetDbSchemaDataArrays(tableDataArrays))); + } + + [Fact] + public void CanInvalidateGetObjectsSchema() + { + IReadOnlyList columnDataArrays = GetColumnDataArraysInvalidColumnType(); + IReadOnlyList tableDataArrays = GetTableDataArrays(columnDataArrays); + IReadOnlyList schemaDataArrays = GetDbSchemaDataArrays(tableDataArrays); + Assert.Throws(() => StandardSchemas.GetObjectsSchema.Validate(GetGetObjectsDataArrays(schemaDataArrays))); + } + + [Fact] + public void CanValidateGetInfoSchema() + { + _ = StandardSchemas.GetInfoSchema.Validate(GetGetInfoDataArrays()); + } + + [Fact] + public void CanInvalidateGetInfoSchema() + { + Exception exception = Assert.Throws(() => StandardSchemas.GetInfoSchema.Validate(GetGetInfoDataArraysWithInvalidType())); + Assert.Contains("Expecting data type Apache.Arrow.Types.StringType but found Apache.Arrow.Types.Int32Type on field with name item.", exception.Message); + } + + private IReadOnlyList GetGetObjectsDataArrays(IReadOnlyList schemaDataArrays) + { + List catalogDbSchemasValues = new List() + { + new StructArray( + new StructType(StandardSchemas.DbSchemaSchema), + 0, + schemaDataArrays, + new ArrowBuffer.BitmapBuilder().Build()) + }; + IReadOnlyList getObjectsDataArrays = new List + { + new StringArray.Builder().Build(), + catalogDbSchemasValues.BuildListArrayForType(new StructType(StandardSchemas.DbSchemaSchema)), + }; + return getObjectsDataArrays; + } + + private IReadOnlyList GetDbSchemaDataArrays(IReadOnlyList tableDataArrays) + { + List dbSchemaTablesValues = new List() + { + new StructArray( + new StructType(StandardSchemas.TableSchema), + 0, + tableDataArrays, + new ArrowBuffer.BitmapBuilder().Build()) + }; + + List schemaDataArrays = new List + { + new StringArray.Builder().Build(), + dbSchemaTablesValues.BuildListArrayForType(new StructType(StandardSchemas.TableSchema)), + }; + + return schemaDataArrays; + } + + private IReadOnlyList GetTableDataArrays(IReadOnlyList columnDataArrays) + { + var columnData = new StructArray( + new StructType(StandardSchemas.ColumnSchema), + 0, + columnDataArrays, + new ArrowBuffer.BitmapBuilder().Build()); + List tableColumnsValues = new List() + { + columnData, + }; + List tableConstraintsValues = new List() + { + null, + }; + List tableDataArrays = new List + { + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + tableColumnsValues.BuildListArrayForType(new StructType(StandardSchemas.ColumnSchema)), + tableConstraintsValues.BuildListArrayForType(new StructType(StandardSchemas.ConstraintSchema)) + }; + return tableDataArrays; + } + + private static List GetColumnDataArrays() => new List + { + new StringArray.Builder().Build(), + new Int32Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int16Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int32Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int32Array.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new BooleanArray.Builder().Build(), + new BooleanArray.Builder().Build(), + }; + + private static List GetColumnDataArraysInvalidColumnType() => new List + { + new StringArray.Builder().Build(), + new Int32Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int16Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int32Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int32Array.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new BooleanArray.Builder().Build(), + new StringArray.Builder().Build(), // invalid type, expects BooleanArray + }; + + private static List GetColumnDataArraysMissingColumn() => new List + { + new StringArray.Builder().Build(), + new Int32Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int16Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int32Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new StringArray.Builder().Build(), + new Int16Array.Builder().Build(), + new Int16Array.Builder().Build(), + new Int32Array.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new StringArray.Builder().Build(), + new BooleanArray.Builder().Build(), + // new BooleanArray.Builder().Build(), // missing column + }; + + private static List GetGetInfoDataArrays() + { + UnionType infoUnionType = new UnionType( + new List() + { + new Field("string_value", StringType.Default, true), + new Field("bool_value", BooleanType.Default, true), + new Field("int64_value", Int64Type.Default, true), + new Field("int32_bitmask", Int32Type.Default, true), + new Field( + "string_list", + new ListType( + new Field("item", StringType.Default, true) + ), + false + ), + new Field( + "int32_to_int32_list_map", + new ListType( + new Field("entries", new StructType( + new List() + { + new Field("key", Int32Type.Default, false), + new Field("value", Int32Type.Default, true), + } + ), false) + ), + true + ) + }, + new int[] { 0, 1, 2, 3, 4, 5 }.ToArray(), + UnionMode.Dense); + + UInt32Array.Builder infoNameBuilder = new UInt32Array.Builder(); + ArrowBuffer.Builder typeBuilder = new ArrowBuffer.Builder(); + ArrowBuffer.Builder offsetBuilder = new ArrowBuffer.Builder(); + StringArray.Builder stringInfoBuilder = new StringArray.Builder(); + int nullCount = 0; + int arrayLength = 0; + + + StructType entryType = new StructType( + new List(){ + new Field("key", Int32Type.Default, false), + new Field("value", Int32Type.Default, true)}); + + StructArray entriesDataArray = new StructArray(entryType, 0, + new[] { new Int32Array.Builder().Build(), new Int32Array.Builder().Build() }, + new ArrowBuffer.BitmapBuilder().Build()); + + List childrenArrays = new List() + { + stringInfoBuilder.Build(), + new BooleanArray.Builder().Build(), + new Int64Array.Builder().Build(), + new Int32Array.Builder().Build(), + new ListArray.Builder(StringType.Default).Build(), + new List(){ entriesDataArray }.BuildListArrayForType(entryType) + }; + + DenseUnionArray infoValue = new DenseUnionArray(infoUnionType, arrayLength, childrenArrays, typeBuilder.Build(), offsetBuilder.Build(), nullCount); + + List dataArrays = new List + { + infoNameBuilder.Build(), + infoValue + }; + return dataArrays; + } + + private static List GetGetInfoDataArraysWithInvalidType() + { + UnionType infoUnionType = new UnionType( + new List() + { + new Field("string_value", StringType.Default, true), + new Field("bool_value", BooleanType.Default, true), + new Field("int64_value", Int64Type.Default, true), + new Field("int32_bitmask", Int32Type.Default, true), + new Field( + "string_list", + new ListType( + new Field("item", StringType.Default, true) + ), + false + ), + new Field( + "int32_to_int32_list_map", + new ListType( + new Field("entries", new StructType( + new List() + { + new Field("key", Int32Type.Default, false), + new Field("value", Int32Type.Default, true), + } + ), false) + ), + true + ) + }, + new int[] { 0, 1, 2, 3, 4, 5 }.ToArray(), + UnionMode.Dense); + + UInt32Array.Builder infoNameBuilder = new UInt32Array.Builder(); + ArrowBuffer.Builder typeBuilder = new ArrowBuffer.Builder(); + ArrowBuffer.Builder offsetBuilder = new ArrowBuffer.Builder(); + StringArray.Builder stringInfoBuilder = new StringArray.Builder(); + int nullCount = 0; + int arrayLength = 0; + + + StructType entryType = new StructType( + new List(){ + new Field("key", Int32Type.Default, false), + new Field("value", Int32Type.Default, true)}); + + StructArray entriesDataArray = new StructArray( + entryType, + 0, + new[] { new Int32Array.Builder().Build(), new Int32Array.Builder().Build() }, + new ArrowBuffer.BitmapBuilder().Build()); + + List childrenArrays = new List() + { + stringInfoBuilder.Build(), + new BooleanArray.Builder().Build(), + new Int64Array.Builder().Build(), + new Int32Array.Builder().Build(), + new ListArray.Builder(Int32Type.Default).Build(), // Should be StringType.Default + new List(){ entriesDataArray }.BuildListArrayForType(entryType) + }; + + DenseUnionArray infoValue = new DenseUnionArray(infoUnionType, arrayLength, childrenArrays, typeBuilder.Build(), offsetBuilder.Build(), nullCount); + + List dataArrays = new List + { + infoNameBuilder.Build(), + infoValue + }; + return dataArrays; + } + } +}