Skip to content

Commit

Permalink
fix(csharp/src/Drivers/BigQuery): improved support for ARRAY columns (#…
Browse files Browse the repository at this point in the history
…1356)

- Improves support for ARRAY column types
- updates XDBC types
- partially addresses #1353
- fix for constraints in the constraints schema

---------

Co-authored-by: David Coe <[email protected]>
  • Loading branch information
davidhcoe and David Coe authored Jan 16, 2024
1 parent 644f6e0 commit 046f8b6
Show file tree
Hide file tree
Showing 10 changed files with 280 additions and 52 deletions.
2 changes: 1 addition & 1 deletion csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public static class StandardSchemas
{
new Field("constraint_name", StringType.Default, false),
new Field("constraint_type", StringType.Default, false),
new Field("constraint_column_usage",
new Field("constraint_column_names",
new ListType(
new Field("item", StringType.Default, true)
),
Expand Down
67 changes: 66 additions & 1 deletion csharp/src/Client/SchemaConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ public static DataTable ConvertArrowSchema(Schema schema, AdbcStatement adbcStat
/// <param name="f"></param>
/// <returns></returns>
public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior)
{
switch (f.DataType.TypeId)
{
case ArrowTypeId.List:
ListType list = f.DataType as ListType;
IArrowType valueType = list.ValueDataType;
return GetArrowArrayType(valueType);
default:
return GetArrowType(f, decimalBehavior);
}
}

public static Type GetArrowType(Field f, DecimalBehavior decimalBehavior)
{
switch (f.DataType.TypeId)
{
Expand All @@ -102,7 +115,7 @@ public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior)
return typeof(bool);

case ArrowTypeId.Decimal128:
if(decimalBehavior == DecimalBehavior.UseSqlDecimal)
if (decimalBehavior == DecimalBehavior.UseSqlDecimal)
return typeof(SqlDecimal);
else
return typeof(decimal);
Expand Down Expand Up @@ -162,5 +175,57 @@ public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior)
return f.DataType.GetType();
}
}

public static Type GetArrowArrayType(IArrowType dataType)
{
switch (dataType.TypeId)
{
case ArrowTypeId.Binary:
return typeof(BinaryArray);
case ArrowTypeId.Boolean:
return typeof(BooleanArray);
case ArrowTypeId.Decimal128:
return typeof(Decimal128Array);
case ArrowTypeId.Decimal256:
return typeof(Decimal256Array);
case ArrowTypeId.Time32:
return typeof(Time32Array);
case ArrowTypeId.Time64:
return typeof(Time64Array);
case ArrowTypeId.Date32:
return typeof(Date32Array);
case ArrowTypeId.Date64:
return typeof(Date64Array);
case ArrowTypeId.Double:
return typeof(DoubleArray);

#if NET5_0_OR_GREATER
case ArrowTypeId.HalfFloat:
return typeof(HalfFloatArray);
#endif
case ArrowTypeId.Float:
return typeof(FloatArray);
case ArrowTypeId.Int8:
return typeof(Int8Array);
case ArrowTypeId.Int16:
return typeof(Int16Array);
case ArrowTypeId.Int32:
return typeof(Int32Array);
case ArrowTypeId.Int64:
return typeof(Int64Array);
case ArrowTypeId.String:
return typeof(StringArray);
case ArrowTypeId.Struct:
return typeof(StructArray);
case ArrowTypeId.Timestamp:
return typeof(TimestampArray);
case ArrowTypeId.Null:
return typeof(NullArray);
case ArrowTypeId.List:
return typeof(ListArray);
}

throw new InvalidCastException($"Cannot determine the array type for {dataType.Name}");
}
}
}
80 changes: 67 additions & 13 deletions csharp/src/Drivers/BigQuery/BigQueryConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Diagnostics;
using System.Linq;
using System.Net.Http;
using System.Text;
Expand Down Expand Up @@ -458,11 +459,23 @@ private StructArray GetColumnSchema(
columnNameBuilder.Append(row["column_name"].ToString());
ordinalPositionBuilder.Append((int)(long)row["ordinal_position"]);
remarksBuilder.Append("");
xdbcDataTypeBuilder.AppendNull();

string dataType = ToTypeName(row["data_type"].ToString());

if (dataType.StartsWith("NUMERIC") || dataType.StartsWith("DECIMAL") || dataType.StartsWith("BIGNUMERIC") || dataType.StartsWith("BIGDECIMAL"))
{
ParsedDecimalValues values = ParsePrecisionAndScale(dataType);
xdbcColumnSizeBuilder.Append(values.Precision);
xdbcDecimalDigitsBuilder.Append(Convert.ToInt16(values.Scale));
}
else
{
xdbcColumnSizeBuilder.AppendNull();
xdbcDecimalDigitsBuilder.AppendNull();
}

xdbcDataTypeBuilder.AppendNull();
xdbcTypeNameBuilder.Append(dataType);
xdbcColumnSizeBuilder.AppendNull();
xdbcDecimalDigitsBuilder.AppendNull();
xdbcNumPrecRadixBuilder.AppendNull();
xdbcNullableBuilder.AppendNull();
xdbcColumnDefBuilder.AppendNull();
Expand Down Expand Up @@ -537,7 +550,7 @@ private StructArray GetConstraintSchema(
nullBitmapBuffer.Append(true);
length++;

if (depth == GetObjectsDepth.All)
if (depth == GetObjectsDepth.All || depth == GetObjectsDepth.Tables)
{
constraintColumnNamesValues.Add(GetConstraintColumnNames(
catalog, dbSchema, table, constraintName));
Expand Down Expand Up @@ -588,7 +601,8 @@ private StringArray GetConstraintColumnNames(

foreach (BigQueryRow row in result)
{
constraintColumnNamesBuilder.Append(row["column_name"].ToString());
string column = row["column_name"].ToString();
constraintColumnNamesBuilder.Append(column);
}

return constraintColumnNamesBuilder.Build();
Expand All @@ -607,17 +621,23 @@ private StructArray GetConstraintsUsage(
ArrowBuffer.BitmapBuilder nullBitmapBuffer = new ArrowBuffer.BitmapBuilder();
int length = 0;

string query = string.Format("SELECT * FROM `{0}`.`{1}`.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE WHERE table_name = '{2}' AND constraint_name = '{3}'",
Sanitize(catalog), Sanitize(dbSchema), Sanitize(table), Sanitize(constraintName));
string query = string.Format("SELECT * FROM `{0}`.`{1}`.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE WHERE constraint_name = '{2}'",
Sanitize(catalog), Sanitize(dbSchema), Sanitize(constraintName));

BigQueryResults result = this.client.ExecuteQuery(query, parameters: null);

foreach (BigQueryRow row in result)
{
constraintFkCatalogBuilder.Append(row["constraint_catalog"].ToString());
constraintFkDbSchemaBuilder.Append(row["constraint_schema"].ToString());
constraintFkTableBuilder.Append(row["table_name"].ToString());
constraintFkColumnNameBuilder.Append(row["column_name"].ToString());
string constraint_catalog = row["constraint_catalog"].ToString();
string constraint_schema = row["constraint_schema"].ToString();
string table_name = row["table_name"].ToString();
string column_name = row["column_name"].ToString();

constraintFkCatalogBuilder.Append(constraint_catalog);
constraintFkDbSchemaBuilder.Append(constraint_schema);
constraintFkTableBuilder.Append(table_name);
constraintFkColumnNameBuilder.Append(column_name);

nullBitmapBuffer.Append(true);
length++;
}
Expand Down Expand Up @@ -683,8 +703,42 @@ private XdbcDataType ToXdbcDataType(string type)
return XdbcDataType.XdbcDataType_XDBC_VARBINARY;
case "NUMERIC" or "DECIMAL" or "BIGNUMERIC" or "BIGDECIMAL":
return XdbcDataType.XdbcDataType_XDBC_NUMERIC;

default:

// in SqlDecimal, an OverflowException is thrown for decimals with scale > 28
// so the XDBC type needs to map the SqlDecimal type
int decimalMaxScale = 28;

if (type.StartsWith("NUMERIC("))
{
ParsedDecimalValues parsedDecimalValues = ParsePrecisionAndScale(type);

if (parsedDecimalValues.Scale <= decimalMaxScale)
return XdbcDataType.XdbcDataType_XDBC_DECIMAL;
else
return XdbcDataType.XdbcDataType_XDBC_VARCHAR;
}

if (type.StartsWith("BIGNUMERIC("))
{
if (bool.Parse(this.properties[BigQueryParameters.LargeDecimalsAsString]))
{
return XdbcDataType.XdbcDataType_XDBC_VARCHAR;
}
else
{
ParsedDecimalValues parsedDecimalValues = ParsePrecisionAndScale(type);

if (parsedDecimalValues.Scale <= decimalMaxScale)
return XdbcDataType.XdbcDataType_XDBC_DECIMAL;
else
return XdbcDataType.XdbcDataType_XDBC_VARCHAR;
}
}

if (type.StartsWith("STRUCT"))
return XdbcDataType.XdbcDataType_XDBC_VARCHAR;

return XdbcDataType.XdbcDataType_XDBC_UNKNOWN_TYPE;
}
}
Expand Down Expand Up @@ -873,7 +927,7 @@ public override AdbcStatement CreateStatement()
throw new InvalidOperationException();
}

if(this.client == null)
if (this.client == null)
{
Open();
}
Expand Down
34 changes: 21 additions & 13 deletions csharp/src/Drivers/BigQuery/BigQueryStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,46 +107,54 @@ private IArrowType TranslateType(TableFieldSchema field)
switch (field.Type)
{
case "INTEGER" or "INT64":
return Int64Type.Default;
return GetType(field, Int64Type.Default);
case "FLOAT" or "FLOAT64":
return DoubleType.Default;
return GetType(field, DoubleType.Default);
case "BOOL" or "BOOLEAN":
return BooleanType.Default;
return GetType(field, BooleanType.Default);
case "STRING":
return StringType.Default;
return GetType(field, StringType.Default);
case "BYTES":
return BinaryType.Default;
return GetType(field, BinaryType.Default);
case "DATETIME":
return TimestampType.Default;
return GetType(field, TimestampType.Default);
case "TIMESTAMP":
return TimestampType.Default;
return GetType(field, TimestampType.Default);
case "TIME":
return Time64Type.Default;
return GetType(field, Time64Type.Default);
case "DATE":
return Date64Type.Default;
return GetType(field, Date64Type.Default);
case "RECORD" or "STRUCT":
// its a json string
return StringType.Default;
return GetType(field, StringType.Default);

// treat these values as strings
case "GEOGRAPHY" or "JSON":
return StringType.Default;
return GetType(field, StringType.Default);

// get schema cannot get precision and scale for NUMERIC or BIGNUMERIC types
// instead, the max values are returned from BigQuery
// see 'precision' on https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
// and discussion in https://github.com/apache/arrow-adbc/pull/1192#discussion_r1365987279

case "NUMERIC" or "DECIMAL":
return new Decimal128Type(38, 9);
return GetType(field, new Decimal128Type(38, 9));

case "BIGNUMERIC" or "BIGDECIMAL":
return bool.Parse(this.Options[BigQueryParameters.LargeDecimalsAsString]) ? StringType.Default : new Decimal256Type(76, 38);
return bool.Parse(this.Options[BigQueryParameters.LargeDecimalsAsString]) ? GetType(field, StringType.Default) : GetType(field, new Decimal256Type(76, 38));

default: throw new InvalidOperationException($"{field.Type} cannot be translated");
}
}

private IArrowType GetType(TableFieldSchema field, IArrowType type)
{
if (field.Mode == "REPEATED")
return new ListType(type);

return type;
}

static IArrowReader ReadChunk(BigQueryReadClient readClient, string streamName)
{
// Ideally we wouldn't need to indirect through a stream, but the necessary APIs in Arrow
Expand Down
20 changes: 1 addition & 19 deletions csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -207,25 +207,7 @@ static void AssertTypeAndValue(

if (netType != null)
{
if (netType.BaseType.Name.Contains("PrimitiveArray") && value != null)
{
int length = Convert.ToInt32(value.GetType().GetProperty("Length").GetValue(value));

if (length > 0)
{
object internalValue = value.GetType().GetMethod("GetValue").Invoke(value, new object[] { 0 });

Assert.True(internalValue.GetType() == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]");
}
else
{
Console.WriteLine($"Could not validate the values inside of {netType.Name} because it is empty for query [{query}]");
}
}
else
{
Assert.True(netType == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]");
}
Assert.True(netType == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]");
}

if (value != null)
Expand Down
35 changes: 35 additions & 0 deletions csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System.Collections.Generic;

namespace Apache.Arrow.Adbc.Tests.Metadata
{
public class AdbcConstraint
{
public AdbcConstraint()
{
ColumnNames = new List<string>();
ColumnUsage = new List<AdbcUsageSchema>();
}

public string Name { get; set; }
public string Type { get; set; }
public List<string> ColumnNames { get; set; }
public List<AdbcUsageSchema> ColumnUsage { get; set; }
}
}
5 changes: 5 additions & 0 deletions csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,10 @@ public class AdbcTable
/// List of columns associated with the table.
/// </summary>
public List<AdbcColumn> Columns { get; set; }

/// <summary>
/// The constrains associated with the table.
/// </summary>
public List<AdbcConstraint> Constraints { get; set; }
}
}
Loading

0 comments on commit 046f8b6

Please sign in to comment.