Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(csharp/src/Drivers/BigQuery): improved support for ARRAY columns #1356

Merged
merged 20 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public static class StandardSchemas
{
new Field("constraint_name", StringType.Default, false),
new Field("constraint_type", StringType.Default, false),
new Field("constraint_column_usage",
new Field("constraint_column_names",
new ListType(
new Field("item", StringType.Default, true)
),
Expand Down
67 changes: 66 additions & 1 deletion csharp/src/Client/SchemaConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ public static DataTable ConvertArrowSchema(Schema schema, AdbcStatement adbcStat
/// <param name="f"></param>
/// <returns></returns>
public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior)
{
switch (f.DataType.TypeId)
{
case ArrowTypeId.List:
ListType list = f.DataType as ListType;
IArrowType valueType = list.ValueDataType;
return GetArrowArrayType(valueType);
default:
return GetArrowType(f, decimalBehavior);
}
}

public static Type GetArrowType(Field f, DecimalBehavior decimalBehavior)
{
switch (f.DataType.TypeId)
{
Expand All @@ -102,7 +115,7 @@ public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior)
return typeof(bool);

case ArrowTypeId.Decimal128:
if(decimalBehavior == DecimalBehavior.UseSqlDecimal)
if (decimalBehavior == DecimalBehavior.UseSqlDecimal)
return typeof(SqlDecimal);
else
return typeof(decimal);
Expand Down Expand Up @@ -162,5 +175,57 @@ public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior)
return f.DataType.GetType();
}
}

public static Type GetArrowArrayType(IArrowType dataType)
{
switch (dataType.TypeId)
{
case ArrowTypeId.Binary:
return typeof(BinaryArray);
case ArrowTypeId.Boolean:
return typeof(BooleanArray);
case ArrowTypeId.Decimal128:
return typeof(Decimal128Array);
case ArrowTypeId.Decimal256:
return typeof(Decimal256Array);
case ArrowTypeId.Time32:
return typeof(Time32Array);
case ArrowTypeId.Time64:
return typeof(Time64Array);
case ArrowTypeId.Date32:
return typeof(Date32Array);
case ArrowTypeId.Date64:
return typeof(Date64Array);
case ArrowTypeId.Double:
return typeof(DoubleArray);

#if NET5_0_OR_GREATER
case ArrowTypeId.HalfFloat:
return typeof(HalfFloatArray);
#endif
case ArrowTypeId.Float:
return typeof(FloatArray);
case ArrowTypeId.Int8:
return typeof(Int8Array);
case ArrowTypeId.Int16:
return typeof(Int16Array);
case ArrowTypeId.Int32:
return typeof(Int32Array);
case ArrowTypeId.Int64:
return typeof(Int64Array);
case ArrowTypeId.String:
return typeof(StringArray);
case ArrowTypeId.Struct:
return typeof(StructArray);
case ArrowTypeId.Timestamp:
return typeof(TimestampArray);
case ArrowTypeId.Null:
return typeof(NullArray);
case ArrowTypeId.List:
return typeof(ListArray);
}

throw new InvalidCastException($"Cannot determine the array type for {dataType.Name}");
}
}
}
58 changes: 49 additions & 9 deletions csharp/src/Drivers/BigQuery/BigQueryConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
public class BigQueryConnection : AdbcConnection
{
readonly IReadOnlyDictionary<string, string> properties;
BigQueryClient? client;

Check warning on line 41 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 41 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 41 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
GoogleCredential? credential;

Check warning on line 42 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 42 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 42 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

const string infoDriverName = "ADBC BigQuery Driver";
const string infoDriverVersion = "1.0.0";
Expand Down Expand Up @@ -221,7 +221,7 @@
new Int64Array.Builder().Build(),
new Int32Array.Builder().Build(),
new ListArray.Builder(StringType.Default).Build(),
CreateNestedListArray(new List<IArrowArray?>(){ entriesDataArray }, entryType)

Check warning on line 224 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 224 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 224 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
};

DenseUnionArray infoValue = new DenseUnionArray(infoUnionType, arrayLength, childrenArrays, typeBuilder.Build(), offsetBuilder.Build(), nullCount);
Expand Down Expand Up @@ -258,7 +258,7 @@
string columnNamePattern)
{
StringArray.Builder catalogNameBuilder = new StringArray.Builder();
List<IArrowArray?> catalogDbSchemasValues = new List<IArrowArray?>();

Check warning on line 261 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 261 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 261 in csharp/src/Drivers/BigQuery/BigQueryConnection.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
string catalogRegexp = PatternToRegEx(catalogPattern);
PagedEnumerable<ProjectList, CloudProject> catalogs = this.client.ListProjects();

Expand Down Expand Up @@ -537,7 +537,7 @@
nullBitmapBuffer.Append(true);
length++;

if (depth == GetObjectsDepth.All)
if (depth == GetObjectsDepth.All || depth == GetObjectsDepth.Tables)
{
constraintColumnNamesValues.Add(GetConstraintColumnNames(
catalog, dbSchema, table, constraintName));
Expand Down Expand Up @@ -588,7 +588,8 @@

foreach (BigQueryRow row in result)
{
constraintColumnNamesBuilder.Append(row["column_name"].ToString());
string column = row["column_name"].ToString();
constraintColumnNamesBuilder.Append(column);
}

return constraintColumnNamesBuilder.Build();
Expand All @@ -607,17 +608,24 @@
ArrowBuffer.BitmapBuilder nullBitmapBuffer = new ArrowBuffer.BitmapBuilder();
int length = 0;

string query = string.Format("SELECT * FROM `{0}`.`{1}`.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE WHERE table_name = '{2}' AND constraint_name = '{3}'",
Sanitize(catalog), Sanitize(dbSchema), Sanitize(table), Sanitize(constraintName));
// table_name = '{2}' AND
davidhcoe marked this conversation as resolved.
Show resolved Hide resolved
string query = string.Format("SELECT * FROM `{0}`.`{1}`.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE WHERE constraint_name = '{2}'",
Sanitize(catalog), Sanitize(dbSchema), /*Sanitize(table),*/ Sanitize(constraintName));

BigQueryResults result = this.client.ExecuteQuery(query, parameters: null);

foreach (BigQueryRow row in result)
{
constraintFkCatalogBuilder.Append(row["constraint_catalog"].ToString());
constraintFkDbSchemaBuilder.Append(row["constraint_schema"].ToString());
constraintFkTableBuilder.Append(row["table_name"].ToString());
constraintFkColumnNameBuilder.Append(row["column_name"].ToString());
string constraint_catalog = row["constraint_catalog"].ToString();
string constraint_schema = row["constraint_schema"].ToString();
string table_name = row["table_name"].ToString();
string column_name = row["column_name"].ToString();

constraintFkCatalogBuilder.Append(constraint_catalog);
constraintFkDbSchemaBuilder.Append(constraint_schema);
constraintFkTableBuilder.Append(table_name);
constraintFkColumnNameBuilder.Append(column_name);

nullBitmapBuffer.Append(true);
length++;
}
Expand Down Expand Up @@ -683,8 +691,40 @@
return XdbcDataType.XdbcDataType_XDBC_VARBINARY;
case "NUMERIC" or "DECIMAL" or "BIGNUMERIC" or "BIGDECIMAL":
return XdbcDataType.XdbcDataType_XDBC_NUMERIC;

default:

int decimalMaxScale = 28;
davidhcoe marked this conversation as resolved.
Show resolved Hide resolved

if(type.StartsWith("NUMERIC("))
davidhcoe marked this conversation as resolved.
Show resolved Hide resolved
{
ParsedDecimalValues parsedDecimalValues = ParsePrecisionAndScale(type);

if (parsedDecimalValues.Scale <= decimalMaxScale)
return XdbcDataType.XdbcDataType_XDBC_DECIMAL;
else
return XdbcDataType.XdbcDataType_XDBC_VARCHAR;
}

if (type.StartsWith("BIGNUMERIC("))
{
if(bool.Parse(this.properties[BigQueryParameters.LargeDecimalsAsString]))
{
return XdbcDataType.XdbcDataType_XDBC_VARCHAR;
}
else
{
ParsedDecimalValues parsedDecimalValues = ParsePrecisionAndScale(type);

if (parsedDecimalValues.Scale <= decimalMaxScale)
return XdbcDataType.XdbcDataType_XDBC_DECIMAL;
else
return XdbcDataType.XdbcDataType_XDBC_VARCHAR;
}
}

if (type.StartsWith("STRUCT"))
return XdbcDataType.XdbcDataType_XDBC_VARCHAR;

return XdbcDataType.XdbcDataType_XDBC_UNKNOWN_TYPE;
}
}
Expand Down
34 changes: 21 additions & 13 deletions csharp/src/Drivers/BigQuery/BigQueryStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@
this.credential = credential;
}

public IReadOnlyDictionary<string, string>? Options { get; set; }

Check warning on line 49 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 49 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 49 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

public override QueryResult ExecuteQuery()
{
QueryOptions? queryOptions = ValidateOptions();

Check warning on line 53 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 53 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 53 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
BigQueryJob job = this.client.CreateQueryJob(SqlQuery, null, queryOptions);
BigQueryResults results = job.GetQueryResults();

Expand Down Expand Up @@ -107,46 +107,54 @@
switch (field.Type)
{
case "INTEGER" or "INT64":
return Int64Type.Default;
return GetType(field, Int64Type.Default);
case "FLOAT" or "FLOAT64":
return DoubleType.Default;
return GetType(field, DoubleType.Default);
case "BOOL" or "BOOLEAN":
return BooleanType.Default;
return GetType(field, BooleanType.Default);
case "STRING":
return StringType.Default;
return GetType(field, StringType.Default);
case "BYTES":
return BinaryType.Default;
return GetType(field, BinaryType.Default);
case "DATETIME":
return TimestampType.Default;
return GetType(field, TimestampType.Default);
case "TIMESTAMP":
return TimestampType.Default;
return GetType(field, TimestampType.Default);
case "TIME":
return Time64Type.Default;
return GetType(field, Time64Type.Default);
case "DATE":
return Date64Type.Default;
return GetType(field, Date64Type.Default);
case "RECORD" or "STRUCT":
// its a json string
return StringType.Default;
return GetType(field, StringType.Default);

// treat these values as strings
case "GEOGRAPHY" or "JSON":
return StringType.Default;
return GetType(field, StringType.Default);

// get schema cannot get precision and scale for NUMERIC or BIGNUMERIC types
// instead, the max values are returned from BigQuery
// see 'precision' on https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
// and discussion in https://github.com/apache/arrow-adbc/pull/1192#discussion_r1365987279

case "NUMERIC" or "DECIMAL":
return new Decimal128Type(38, 9);
return GetType(field, new Decimal128Type(38, 9));

case "BIGNUMERIC" or "BIGDECIMAL":
return bool.Parse(this.Options[BigQueryParameters.LargeDecimalsAsString]) ? StringType.Default : new Decimal256Type(76, 38);
return bool.Parse(this.Options[BigQueryParameters.LargeDecimalsAsString]) ? GetType(field, StringType.Default) : GetType(field, new Decimal256Type(76, 38));

default: throw new InvalidOperationException($"{field.Type} cannot be translated");
}
}

private IArrowType GetType(TableFieldSchema field, IArrowType type)
{
if(field.Mode == "REPEATED")
davidhcoe marked this conversation as resolved.
Show resolved Hide resolved
return new ListType(type);

return type;
}

static IArrowReader ReadChunk(BigQueryReadClient readClient, string streamName)
{
// Ideally we wouldn't need to indirect through a stream, but the necessary APIs in Arrow
Expand All @@ -159,7 +167,7 @@
return new ArrowStreamReader(stream);
}

private QueryOptions? ValidateOptions()

Check warning on line 170 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 170 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 170 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
{
if (this.Options == null || this.Options.Count == 0)
return null;
Expand Down Expand Up @@ -195,8 +203,8 @@
class MultiArrowReader : IArrowArrayStream
{
readonly Schema schema;
IEnumerator<IArrowReader>? readers;

Check warning on line 206 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 206 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 206 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
IArrowReader? reader;

Check warning on line 207 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 207 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# windows-2019

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 207 in csharp/src/Drivers/BigQuery/BigQueryStatement.cs

View workflow job for this annotation

GitHub Actions / C# macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

public MultiArrowReader(Schema schema, IEnumerable<IArrowReader> readers)
{
Expand Down
20 changes: 1 addition & 19 deletions csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -207,25 +207,7 @@ static void AssertTypeAndValue(

if (netType != null)
{
if (netType.BaseType.Name.Contains("PrimitiveArray") && value != null)
{
int length = Convert.ToInt32(value.GetType().GetProperty("Length").GetValue(value));

if (length > 0)
{
object internalValue = value.GetType().GetMethod("GetValue").Invoke(value, new object[] { 0 });

Assert.True(internalValue.GetType() == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]");
}
else
{
Console.WriteLine($"Could not validate the values inside of {netType.Name} because it is empty for query [{query}]");
}
}
else
{
Assert.True(netType == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]");
}
Assert.True(netType == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]");
}

if (value != null)
Expand Down
35 changes: 35 additions & 0 deletions csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System.Collections.Generic;

namespace Apache.Arrow.Adbc.Tests.Metadata
{
public class AdbcConstraint
{
public AdbcConstraint()
{
ColumnNames = new List<string>();
ColumnUsage = new List<AdbcUsageSchema>();
}

public string Name { get; set; }
public string Type { get; set; }
public List<string> ColumnNames { get; set; }
public List<AdbcUsageSchema> ColumnUsage { get; set; }
}
}
5 changes: 5 additions & 0 deletions csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,10 @@ public class AdbcTable
/// List of columns associated with the table.
/// </summary>
public List<AdbcColumn> Columns { get; set; }

/// <summary>
/// The constrains associated with the table.
/// </summary>
public List<AdbcConstraint> Constraints { get; set; }
}
}
30 changes: 30 additions & 0 deletions csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcUsageSchema.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

namespace Apache.Arrow.Adbc.Tests.Metadata
{
public class AdbcUsageSchema
{
public string FkCatalog { get; set; }

public string FkDbSchema { get; set; }

public string FkTable { get; set; }

public string FkColumnName { get; set; }
}
}
Loading
Loading