From a24b0466cec3bbf01bae8033befc7cead0c38988 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 3 Nov 2023 17:24:24 -0400 Subject: [PATCH 01/30] Initial hacks --- c/driver/postgresql/postgres_copy_reader.h | 87 ++++++++++++++++++ .../postgresql/postgres_copy_reader_test.cc | 61 +++++++++++++ c/validation/adbc_validation.cc | 91 +++++++++++++++++++ c/validation/adbc_validation.h | 3 + c/validation/adbc_validation_util.cc | 13 ++- c/validation/adbc_validation_util.h | 15 +++ 6 files changed, 269 insertions(+), 1 deletion(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 0943635118..eedec2c83e 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1217,6 +1217,90 @@ class PostgresCopyIntervalFieldWriter : public PostgresCopyFieldWriter { } }; + +// Inspiration for this taken from get_str_from_var in the pg source +// src/backend/utils/adt/numeric.c +class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { +public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + struct ArrowDecimal decimal; + // TODO: these need to be inferred from the schema not hard coded + constexpr int16_t precision = 10; + constexpr int16_t scale = 4; + ArrowDecimalInit(&decimal, 128, precision, scale); + ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); + + // TODO: this is only guaranteed to work up to a precision of 18 + // how do we handle higher? + int64_t int_val = ArrowDecimalGetIntUnsafe(&decimal); + + // logic adoped from postgres int64_to_numericvar function + constexpr uint16_t kNumericPos = 0x0000; + constexpr uint16_t kNumericNeg = 0x4000; + constexpr uint64_t kNBase = 10000; + // Number of decimal digits per Postgres digit + constexpr int kDecDigits = 4; + + constexpr int16_t var_digits = 20 / kDecDigits; + int16_t* buf = (int16_t*)std::malloc((var_digits + 1) * sizeof(int16_t)); + buf[0] = 0; // spare digit for rounding + + uint64_t uval; + int16_t sign; + if (int_val < 0) { + sign = kNumericNeg; + uval = -int_val; + } else { + sign = kNumericPos; + uval = int_val; + } + int dscale = 0; + + int16_t ndigits = 0; + int16_t weight; + if (int_val == 0) { + // TODO: change this + ndigits = 0; + weight = 0; + } + + + int16_t* ptr = buf + 1 + var_digits; + ndigits = 0; + do { + ptr--; + ndigits++; + const uint64_t newuval = uval / kNBase; + *ptr = uval - newuval * kNBase; + uval = newuval; + } while (uval); + weight = ndigits - 1; + + // TODO: ndigits should equal pg_digits length + int32_t field_size_bytes = sizeof(ndigits) + + sizeof(weight) + + sizeof(sign) + + sizeof(dscale) + + ndigits * sizeof(int16_t); + + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ndigits, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, weight, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); + + // Not efficient but works for now + for (int16_t i = 0; i < ndigits; i++) { + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ptr[i], error)); + } + + // TODO: RAII + std::free(buf); + + return ADBC_STATUS_OK; + } +}; + template class PostgresCopyDurationFieldWriter : public PostgresCopyFieldWriter { public: @@ -1362,6 +1446,9 @@ static inline ArrowErrorCode MakeCopyFieldWriter( case NANOARROW_TYPE_DOUBLE: *out = new PostgresCopyDoubleFieldWriter(); return NANOARROW_OK; + case NANOARROW_TYPE_DECIMAL128: + *out = new PostgresCopyNumericFieldWriter(); + return NANOARROW_OK; case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_LARGE_STRING: diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc index 00ba120480..e2ee987c66 100644 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ b/c/driver/postgresql/postgres_copy_reader_test.cc @@ -680,6 +680,67 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) { EXPECT_EQ(std::string(item.data, item.size_bytes), "inf"); } +// This buffer is similar to the read variant above but removes special values +// nan, ±inf as they are not supported via the Arrow Decimal types +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), +// ('0.00001234'), ('1.0000'), (123.456), (1000000)) AS drvd(col)) +// TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericWrite[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, + 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, + 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr int32_t size = 128; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal4; + struct ArrowDecimal decimal5; + + ArrowDecimalInit(&decimal1, size, 10, 4); + ArrowDecimalSetInt(&decimal1, -1234560); + ArrowDecimalInit(&decimal2, size, 10, 4); + ArrowDecimalSetInt(&decimal2, 1234); + ArrowDecimalInit(&decimal3, size, 10, 4); + ArrowDecimalSetInt(&decimal3, 10000); + ArrowDecimalInit(&decimal4, size, 10, 4); + ArrowDecimalSetInt(&decimal4, 1234560); + ArrowDecimalInit(&decimal5, size, 10, 0); + ArrowDecimalSetInt(&decimal5, 1000000); + + const std::vector> values = { + std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; + + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyNumericWrite) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericWrite[i]); + } +} + // COPY (SELECT CAST(col AS TIMESTAMP) FROM ( VALUES ('1900-01-01 12:34:56'), // ('2100-01-01 12:34:56'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); static uint8_t kTestPgCopyTimestamp[] = { diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index d30aa0a979..62a92b6910 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1528,6 +1528,97 @@ void StatementTest::TestSqlIngestFloat64() { ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType(NANOARROW_TYPE_DOUBLE)); } +// For full coverage, ensure that this contains Decimal examples that: +// - Have >= four zeroes to the left of the decimal point +// - Have >= four zeroes to the right of the decimal point +// - Have >= four trailing zeroes to the right of the decimal point +// - Have >= four leading zeroes before the first digit to the right of the decimal point +// - Is < 0 (negative) +// - The arrow Decimal implementations do not support special values nan, ±inf +void StatementTest::TestSqlIngestDecimal128() { + if (!quirks()->supports_bulk_ingest(ADBC_INGEST_OPTION_MODE_CREATE)) { + GTEST_SKIP(); + } + + ASSERT_THAT(quirks()->DropTable(&connection, "bulk_ingest", &error), + IsOkStatus(&error)); + + Handle schema; + Handle array; + struct ArrowError na_error; + constexpr int32_t size = 128; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal4; + struct ArrowDecimal decimal5; + + ArrowDecimalInit(&decimal1, size, 10, 4); + ArrowDecimalSetInt(&decimal1, -1234560); + ArrowDecimalInit(&decimal2, size, 10, 4); + ArrowDecimalSetInt(&decimal2, 1234); + ArrowDecimalInit(&decimal3, size, 10, 4); + ArrowDecimalSetInt(&decimal3, 10000); + ArrowDecimalInit(&decimal4, size, 10, 4); + ArrowDecimalSetInt(&decimal4, 1234560); + ArrowDecimalInit(&decimal5, size, 10, 0); + ArrowDecimalSetInt(&decimal5, 1000000); + + const std::vector> values = { + std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; + + ASSERT_THAT(MakeSchema(&schema.value, {{"col", type}}), IsOkErrno()); + ASSERT_THAT(MakeBatch(&schema.value, &array.value, + &na_error, values), IsOkErrno()); + + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_TARGET_TABLE, + "bulk_ingest", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementBind(&statement, &array.value, &schema.value, &error), + IsOkStatus(&error)); + + int64_t rows_affected = 0; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, &rows_affected, &error), + IsOkStatus(&error)); + ASSERT_THAT(rows_affected, + ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); + + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, + "SELECT * FROM bulk_ingest ORDER BY \"col\" ASC NULLS FIRST", &error), + IsOkStatus(&error)); + { + StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_THAT(reader.rows_affected, + ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); + + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ArrowType round_trip_type = quirks()->IngestSelectRoundTripType(type); + ASSERT_NO_FATAL_FAILURE( + CompareSchema(&reader.schema.value, {{"col", round_trip_type, NULLABLE}})); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_EQ(values.size(), reader.array->length); + ASSERT_EQ(1, reader.array->n_children); + + if (round_trip_type == type) { + ASSERT_NO_FATAL_FAILURE( + CompareArray(reader.array_view->children[0], values)); + } + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(nullptr, reader.array->release); + } + ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); +} + void StatementTest::TestSqlIngestString() { ASSERT_NO_FATAL_FAILURE(TestSqlIngestType( NANOARROW_TYPE_STRING, {std::nullopt, "", "", "1234", "例"}, false)); diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h index 874d9a0584..1312c7541a 100644 --- a/c/validation/adbc_validation.h +++ b/c/validation/adbc_validation.h @@ -328,6 +328,9 @@ class StatementTest { void TestSqlIngestFloat32(); void TestSqlIngestFloat64(); + // Decmial + void TestSqlIngestDecimal128(); + // Strings void TestSqlIngestString(); void TestSqlIngestLargeString(); diff --git a/c/validation/adbc_validation_util.cc b/c/validation/adbc_validation_util.cc index b3ca7d5e9d..96c6b8a48a 100644 --- a/c/validation/adbc_validation_util.cc +++ b/c/validation/adbc_validation_util.cc @@ -141,7 +141,18 @@ int MakeSchema(struct ArrowSchema* schema, const std::vector& field CHECK_ERRNO(ArrowSchemaSetTypeStruct(schema, fields.size())); size_t i = 0; for (const SchemaField& field : fields) { - CHECK_ERRNO(ArrowSchemaSetType(schema->children[i], field.type)); + switch (field.type) { + case NANOARROW_TYPE_DECIMAL128: + // TODO: don't hardcore 10, 4 + CHECK_ERRNO(AdbcNsArrowSchemaSetTypeDecimal(schema->children[i], + field.type, + 10, + 4)); + break; + default: + CHECK_ERRNO(ArrowSchemaSetType(schema->children[i], field.type)); + } + CHECK_ERRNO(ArrowSchemaSetName(schema->children[i], field.name.c_str())); if (!field.nullable) { schema->children[i]->flags &= ~ARROW_FLAG_NULLABLE; diff --git a/c/validation/adbc_validation_util.h b/c/validation/adbc_validation_util.h index da71e0d9e8..ef048e039c 100644 --- a/c/validation/adbc_validation_util.h +++ b/c/validation/adbc_validation_util.h @@ -283,6 +283,10 @@ int MakeArray(struct ArrowArray* parent, struct ArrowArray* array, if (int errno_res = ArrowArrayAppendInterval(array, *v); errno_res != 0) { return errno_res; } + } else if constexpr (std::is_same::value) { + if (int errno_res = ArrowArrayAppendDecimal(array, *v); errno_res != 0) { + return errno_res; + } } else { static_assert(!sizeof(T), "Not yet implemented"); return ENOTSUP; @@ -412,6 +416,17 @@ void CompareArray(struct ArrowArrayView* array, ASSERT_EQ(interval.months, (*v)->months); ASSERT_EQ(interval.days, (*v)->days); ASSERT_EQ(interval.ns, (*v)->ns); + } else if constexpr (std::is_same::value) { + ASSERT_NE(array->buffer_views[1].data.data, nullptr); + struct ArrowDecimal decimal; + // For now assuming Decimal128 so set as bitwidth + ArrowDecimalInit(&decimal, 128, (*v)->precision, (*v)->scale); + ArrowArrayViewGetDecimalUnsafe(array, i, &decimal); + + ASSERT_EQ(decimal.n_words, (*v)->n_words); + // For now assuming Decimal128 so only need to check first two words + ASSERT_EQ(decimal.words[0], (*v)->words[0]); + ASSERT_EQ(decimal.words[1], (*v)->words[1]); } else { static_assert(!sizeof(T), "Not yet implemented"); } From c5dfd05500233f563e8a0ae63b32e60abd1249d5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Nov 2023 14:26:36 -0500 Subject: [PATCH 02/30] feat(c/driver/postgresql): Support for writing DECIMAL128 --- c/driver/postgresql/postgres_copy_reader.h | 82 ++++++++----------- .../postgresql/postgres_copy_reader_test.cc | 18 ++-- c/driver/postgresql/postgresql_test.cc | 2 + c/driver/postgresql/statement.cc | 7 ++ c/validation/adbc_validation.cc | 35 +++++--- c/validation/adbc_validation.h | 1 + c/validation/adbc_validation_util.cc | 6 +- 7 files changed, 79 insertions(+), 72 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 9f9290e676..d22811f8f5 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1225,56 +1225,48 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { struct ArrowDecimal decimal; // TODO: these need to be inferred from the schema not hard coded - constexpr int16_t precision = 10; - constexpr int16_t scale = 4; + constexpr int16_t precision = 19; + constexpr int16_t scale = 8; ArrowDecimalInit(&decimal, 128, precision, scale); ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); - - // TODO: this is only guaranteed to work up to a precision of 18 - // how do we handle higher? - int64_t int_val = ArrowDecimalGetIntUnsafe(&decimal); - - // logic adoped from postgres int64_to_numericvar function constexpr uint16_t kNumericPos = 0x0000; constexpr uint16_t kNumericNeg = 0x4000; - constexpr uint64_t kNBase = 10000; + constexpr int64_t kNBase = 10000; // Number of decimal digits per Postgres digit constexpr int kDecDigits = 4; - constexpr int16_t var_digits = 20 / kDecDigits; - int16_t* buf = (int16_t*)std::malloc((var_digits + 1) * sizeof(int16_t)); - buf[0] = 0; // spare digit for rounding - - uint64_t uval; - int16_t sign; - if (int_val < 0) { - sign = kNumericNeg; - uval = -int_val; - } else { - sign = kNumericPos; - uval = int_val; - } - int dscale = 0; - - int16_t ndigits = 0; - int16_t weight; - if (int_val == 0) { - // TODO: change this - ndigits = 0; - weight = 0; + // TODO: need some kind of bounds check on this + int64_t decimal_int = ArrowDecimalGetIntUnsafe(&decimal); + // TODO: is -INT64_MIN possible? If so how do we handle? + if (decimal_int < 0) { + decimal_int = -decimal_int; + } + std::vector pg_digits; + + int16_t weight = -1; + constexpr size_t nloops = (precision + kDecDigits - 1) / kDecDigits; + for (size_t i = 0; i < nloops; i++) { + const int64_t rem = decimal_int % kNBase; + // TODO: postgres seems to pack records to the left of a decimal place + // internally, so 1000000.0 would be sent as one digit of 100 with + // a weight of 1 (there are weight + 1 pg digits to the left of a decimal place) + // Here we still send two digits of 100 and 0000 + pg_digits.insert(pg_digits.begin(), rem); + + // TODO: how does pg deal with words when integer and decimal part are sent + // in same word? + decimal_int /= kNBase; + if (i >= scale / kDecDigits) { + weight++; + if (decimal_int == 0) { + break; + } + } } - - int16_t* ptr = buf + 1 + var_digits; - ndigits = 0; - do { - ptr--; - ndigits++; - const uint64_t newuval = uval / kNBase; - *ptr = uval - newuval * kNBase; - uval = newuval; - } while (uval); - weight = ndigits - 1; + int16_t ndigits = pg_digits.size(); + const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; + const int16_t dscale = scale; // TODO: ndigits should equal pg_digits length int32_t field_size_bytes = sizeof(ndigits) @@ -1289,14 +1281,10 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); - // Not efficient but works for now - for (int16_t i = 0; i < ndigits; i++) { - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ptr[i], error)); + for (auto pg_digit : pg_digits) { + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, pg_digit, error)); } - // TODO: RAII - std::free(buf); - return ADBC_STATUS_OK; } }; diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc index e2ee987c66..ab27093e27 100644 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ b/c/driver/postgresql/postgres_copy_reader_test.cc @@ -708,16 +708,16 @@ TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { struct ArrowDecimal decimal4; struct ArrowDecimal decimal5; - ArrowDecimalInit(&decimal1, size, 10, 4); - ArrowDecimalSetInt(&decimal1, -1234560); - ArrowDecimalInit(&decimal2, size, 10, 4); + ArrowDecimalInit(&decimal1, size, 19, 8); + ArrowDecimalSetInt(&decimal1, -12345600000); + ArrowDecimalInit(&decimal2, size, 19, 8); ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, size, 10, 4); - ArrowDecimalSetInt(&decimal3, 10000); - ArrowDecimalInit(&decimal4, size, 10, 4); - ArrowDecimalSetInt(&decimal4, 1234560); - ArrowDecimalInit(&decimal5, size, 10, 0); - ArrowDecimalSetInt(&decimal5, 1000000); + ArrowDecimalInit(&decimal3, size, 19, 8); + ArrowDecimalSetInt(&decimal3, 100000000); + ArrowDecimalInit(&decimal4, size, 19, 8); + ArrowDecimalSetInt(&decimal4, 12345600000); + ArrowDecimalInit(&decimal5, size, 19, 8); + ArrowDecimalSetInt(&decimal5, 100000000000000); const std::vector> values = { std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index e1f95a4901..70e8a153cf 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -95,6 +95,8 @@ class PostgresQuirks : public adbc_validation::DriverQuirks { return NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO; case NANOARROW_TYPE_LARGE_STRING: return NANOARROW_TYPE_STRING; + case NANOARROW_TYPE_DECIMAL128: + return NANOARROW_TYPE_STRING; default: return ingest_type; } diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 6f1bb9bd54..5f2217aa62 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -210,6 +210,10 @@ struct BindStream { type_id = PostgresTypeId::kInterval; param_lengths[i] = 16; break; + case ArrowType::NANOARROW_TYPE_DECIMAL128: + type_id = PostgresTypeId::kNumeric; + param_lengths[i] = 0; + break; case ArrowType::NANOARROW_TYPE_DICTIONARY: { struct ArrowSchemaView value_view; CHECK_NA(INTERNAL, @@ -1056,6 +1060,9 @@ AdbcStatusCode PostgresStatement::CreateBulkTable( case ArrowType::NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: create += " INTERVAL"; break; + case ArrowType::NANOARROW_TYPE_DECIMAL128: + create += " DECIMAL"; + break; case ArrowType::NANOARROW_TYPE_DICTIONARY: { struct ArrowSchemaView value_view; CHECK_NA(INTERNAL, diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index 62a92b6910..0fcd9573cf 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1555,16 +1555,16 @@ void StatementTest::TestSqlIngestDecimal128() { struct ArrowDecimal decimal4; struct ArrowDecimal decimal5; - ArrowDecimalInit(&decimal1, size, 10, 4); - ArrowDecimalSetInt(&decimal1, -1234560); - ArrowDecimalInit(&decimal2, size, 10, 4); + ArrowDecimalInit(&decimal1, size, 19, 8); + ArrowDecimalSetInt(&decimal1, -12345600000); + ArrowDecimalInit(&decimal2, size, 19, 8); ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, size, 10, 4); - ArrowDecimalSetInt(&decimal3, 10000); - ArrowDecimalInit(&decimal4, size, 10, 4); - ArrowDecimalSetInt(&decimal4, 1234560); - ArrowDecimalInit(&decimal5, size, 10, 0); - ArrowDecimalSetInt(&decimal5, 1000000); + ArrowDecimalInit(&decimal3, size, 19, 8); + ArrowDecimalSetInt(&decimal3, 100000000); + ArrowDecimalInit(&decimal4, size, 19, 8); + ArrowDecimalSetInt(&decimal4, 12345600000); + ArrowDecimalInit(&decimal5, size, 19, 8); + ArrowDecimalSetInt(&decimal5, 100000000000000); const std::vector> values = { std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; @@ -1608,10 +1608,19 @@ void StatementTest::TestSqlIngestDecimal128() { ASSERT_EQ(values.size(), reader.array->length); ASSERT_EQ(1, reader.array->n_children); - if (round_trip_type == type) { - ASSERT_NO_FATAL_FAILURE( - CompareArray(reader.array_view->children[0], values)); - } + // Currently postgres roundtrips to string, but in the future we should + // roundtrip to a decimal + // + //if (round_trip_type == type) { + // ASSERT_NO_FATAL_FAILURE( + // CompareArray(reader.array_view->children[0], values)); + //} + + const std::vector> str_values = { + std::nullopt, "-123.45600000", "0.00001234", "1.00000000", "123.45600000", + "1000000.00000000"}; + ASSERT_NO_FATAL_FAILURE( + CompareArray(reader.array_view->children[0], str_values)); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_EQ(nullptr, reader.array->release); diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h index 1312c7541a..44b9e98316 100644 --- a/c/validation/adbc_validation.h +++ b/c/validation/adbc_validation.h @@ -437,6 +437,7 @@ class StatementTest { TEST_F(FIXTURE, SqlIngestUInt64) { TestSqlIngestUInt64(); } \ TEST_F(FIXTURE, SqlIngestFloat32) { TestSqlIngestFloat32(); } \ TEST_F(FIXTURE, SqlIngestFloat64) { TestSqlIngestFloat64(); } \ + TEST_F(FIXTURE, SqlIngestDecimal128) { TestSqlIngestDecimal128(); } \ TEST_F(FIXTURE, SqlIngestString) { TestSqlIngestString(); } \ TEST_F(FIXTURE, SqlIngestLargeString) { TestSqlIngestLargeString(); } \ TEST_F(FIXTURE, SqlIngestBinary) { TestSqlIngestBinary(); } \ diff --git a/c/validation/adbc_validation_util.cc b/c/validation/adbc_validation_util.cc index 96c6b8a48a..b72bcb69c6 100644 --- a/c/validation/adbc_validation_util.cc +++ b/c/validation/adbc_validation_util.cc @@ -143,11 +143,11 @@ int MakeSchema(struct ArrowSchema* schema, const std::vector& field for (const SchemaField& field : fields) { switch (field.type) { case NANOARROW_TYPE_DECIMAL128: - // TODO: don't hardcore 10, 4 + // TODO: don't hardcore 19, 8 CHECK_ERRNO(AdbcNsArrowSchemaSetTypeDecimal(schema->children[i], field.type, - 10, - 4)); + 19, + 8)); break; default: CHECK_ERRNO(ArrowSchemaSetType(schema->children[i], field.type)); From a091bcdf4441b56a02cb795e2f3b6d209bf37355 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Nov 2023 14:58:25 -0500 Subject: [PATCH 03/30] removed TODO --- c/driver/postgresql/postgres_copy_reader.h | 1 - 1 file changed, 1 deletion(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index d22811f8f5..71e084d09b 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1268,7 +1268,6 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; const int16_t dscale = scale; - // TODO: ndigits should equal pg_digits length int32_t field_size_bytes = sizeof(ndigits) + sizeof(weight) + sizeof(sign) From 61eb3cc3fca6428d43207c59b5608f31113b60d1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Nov 2023 20:43:45 -0500 Subject: [PATCH 04/30] trailing decimals --- c/driver/postgresql/postgres_copy_reader.h | 48 +++++++++++++------ .../postgresql/postgres_copy_reader_test.cc | 8 ++-- c/validation/adbc_validation.cc | 3 +- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 71e084d09b..d915e1b966 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1236,37 +1236,57 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { constexpr int kDecDigits = 4; // TODO: need some kind of bounds check on this - int64_t decimal_int = ArrowDecimalGetIntUnsafe(&decimal); + const int64_t decimal_int = ArrowDecimalGetIntUnsafe(&decimal); + // TODO: is -INT64_MIN possible? If so how do we handle? - if (decimal_int < 0) { - decimal_int = -decimal_int; - } + int64_t value = decimal_int < 0 ? -decimal_int : decimal_int; std::vector pg_digits; - int16_t weight = -1; + int16_t weight = -(scale / kDecDigits); + int16_t dscale = scale; constexpr size_t nloops = (precision + kDecDigits - 1) / kDecDigits; + bool seen_decimal = scale == 0; + bool truncating_trailing_zeros = true; + for (size_t i = 0; i < nloops; i++) { - const int64_t rem = decimal_int % kNBase; + const int64_t rem = value % kNBase; // TODO: postgres seems to pack records to the left of a decimal place // internally, so 1000000.0 would be sent as one digit of 100 with // a weight of 1 (there are weight + 1 pg digits to the left of a decimal place) // Here we still send two digits of 100 and 0000 - pg_digits.insert(pg_digits.begin(), rem); + if (rem == 0) { + if (!seen_decimal && truncating_trailing_zeros) { + dscale -= kDecDigits; + } + } else { + pg_digits.insert(pg_digits.begin(), rem); + if (!seen_decimal && truncating_trailing_zeros) { + if (rem % 1000 == 0) { + dscale -= 3; + } else if (rem % 100 == 0) { + dscale -= 2; + } else if (rem % 10 == 0) { + dscale -= 1; + } + } + truncating_trailing_zeros = false; + } // TODO: how does pg deal with words when integer and decimal part are sent // in same word? - decimal_int /= kNBase; - if (i >= scale / kDecDigits) { - weight++; - if (decimal_int == 0) { - break; - } + value /= kNBase; + if (value == 0) { + break; + } + weight++; + + if (i >= scale / kDecDigits - 1) { + seen_decimal = true; } } int16_t ndigits = pg_digits.size(); const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; - const int16_t dscale = scale; int32_t field_size_bytes = sizeof(ndigits) + sizeof(weight) diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc index ab27093e27..354a02bf00 100644 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ b/c/driver/postgresql/postgres_copy_reader_test.cc @@ -683,15 +683,15 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) { // This buffer is similar to the read variant above but removes special values // nan, ±inf as they are not supported via the Arrow Decimal types // COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), -// ('0.00001234'), ('1.0000'), (123.456), (1000000)) AS drvd(col)) +// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col)) // TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyNumericWrite[] = { 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, - 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, + 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x04, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; @@ -737,7 +737,7 @@ TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { constexpr size_t buf_size = sizeof(kTestPgCopyNumericWrite) - 2; ASSERT_EQ(buf.size_bytes, buf_size); for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyNumericWrite[i]); + ASSERT_EQ(buf.data[i], kTestPgCopyNumericWrite[i]) << " at position " << i; } } diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index 0fcd9573cf..c975c105ae 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1617,8 +1617,7 @@ void StatementTest::TestSqlIngestDecimal128() { //} const std::vector> str_values = { - std::nullopt, "-123.45600000", "0.00001234", "1.00000000", "123.45600000", - "1000000.00000000"}; + std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000"}; ASSERT_NO_FATAL_FAILURE( CompareArray(reader.array_view->children[0], str_values)); From 078de30921e5882eeed7900429fd99613d7665d9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 28 Nov 2023 12:49:34 -0800 Subject: [PATCH 05/30] more decimal hacks --- c/cmake_modules/AdbcDefines.cmake | 7 +----- c/driver/postgresql/postgres_copy_reader.h | 14 +++++++---- c/validation/adbc_validation.cc | 27 ++++++++++++++++------ 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/c/cmake_modules/AdbcDefines.cmake b/c/cmake_modules/AdbcDefines.cmake index 5cc2eaa60b..79224df196 100644 --- a/c/cmake_modules/AdbcDefines.cmake +++ b/c/cmake_modules/AdbcDefines.cmake @@ -75,12 +75,7 @@ if(MSVC) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(ADBC_C_CXX_FLAGS_CHECKIN - -Wall - -Wextra - -Wpedantic - -Werror - -Wno-unused-parameter) + set(ADBC_C_CXX_FLAGS_CHECKIN -Wall -Wextra -Werror -Wno-unused-parameter) set(ADBC_C_CXX_FLAGS_PRODUCTION -Wall) else() message(WARNING "Unknown compiler: ${CMAKE_CXX_COMPILER_ID}") diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index d915e1b966..1034cf3c63 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1225,7 +1225,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { struct ArrowDecimal decimal; // TODO: these need to be inferred from the schema not hard coded - constexpr int16_t precision = 19; + constexpr int16_t precision = 38; constexpr int16_t scale = 8; ArrowDecimalInit(&decimal, 128, precision, scale); ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); @@ -1235,11 +1235,15 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { // Number of decimal digits per Postgres digit constexpr int kDecDigits = 4; - // TODO: need some kind of bounds check on this - const int64_t decimal_int = ArrowDecimalGetIntUnsafe(&decimal); + // This assumes that we are dealing with Decimal128 + // more work required for 256 support + uint8_t bytes_tmp[32]; + ArrowDecimalGetBytes(&decimal, bytes_tmp); + __int128 tmp; + std::memcpy(&tmp, bytes_tmp, sizeof(tmp)); // TODO: is -INT64_MIN possible? If so how do we handle? - int64_t value = decimal_int < 0 ? -decimal_int : decimal_int; + unsigned __int128 value = tmp < 0 ? -tmp : tmp; std::vector pg_digits; int16_t weight = -(scale / kDecDigits); @@ -1249,7 +1253,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { bool truncating_trailing_zeros = true; for (size_t i = 0; i < nloops; i++) { - const int64_t rem = value % kNBase; + const unsigned __int128 rem = value % kNBase; // TODO: postgres seems to pack records to the left of a decimal place // internally, so 1000000.0 would be sent as one digit of 100 with // a weight of 1 (there are weight + 1 pg digits to the left of a decimal place) diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index c975c105ae..324c837772 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1554,20 +1554,32 @@ void StatementTest::TestSqlIngestDecimal128() { struct ArrowDecimal decimal3; struct ArrowDecimal decimal4; struct ArrowDecimal decimal5; + struct ArrowDecimal decimal6; - ArrowDecimalInit(&decimal1, size, 19, 8); + ArrowDecimalInit(&decimal1, size, 38, 8); ArrowDecimalSetInt(&decimal1, -12345600000); - ArrowDecimalInit(&decimal2, size, 19, 8); + ArrowDecimalInit(&decimal2, size, 38, 8); ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, size, 19, 8); + ArrowDecimalInit(&decimal3, size, 38, 8); ArrowDecimalSetInt(&decimal3, 100000000); - ArrowDecimalInit(&decimal4, size, 19, 8); + ArrowDecimalInit(&decimal4, size, 38, 8); ArrowDecimalSetInt(&decimal4, 12345600000); - ArrowDecimalInit(&decimal5, size, 19, 8); + ArrowDecimalInit(&decimal5, size, 38, 8); ArrowDecimalSetInt(&decimal5, 100000000000000); + ArrowDecimalInit(&decimal6, size, 38, 8); + // 2342394230592.232349023094 in little endian + uint8_t le_data[16] = { + 0x76, 0xbb, 0xc8, 0x2c, 0x1c, 0x2b, 0x18, 0x72, + 0x05, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00}; + ArrowDecimalSetBytes(&decimal6, le_data); + // + const uint64_t large_decimal[2] = {1, 2345678901234567890}; + uint8_t large_decimal_bytes[16]; + std::memcpy(&large_decimal_bytes, large_decimal, sizeof(large_decimal_bytes)); + ArrowDecimalSetBytes(&decimal6, large_decimal_bytes); const std::vector> values = { - std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; + std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5, &decimal6}; ASSERT_THAT(MakeSchema(&schema.value, {{"col", type}}), IsOkErrno()); ASSERT_THAT(MakeBatch(&schema.value, &array.value, @@ -1617,7 +1629,8 @@ void StatementTest::TestSqlIngestDecimal128() { //} const std::vector> str_values = { - std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000"}; + std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000", + "2342394230592.232349023094"}; ASSERT_NO_FATAL_FAILURE( CompareArray(reader.array_view->children[0], str_values)); From bf8ed7b218ee60df6f910714a2578a7616b2f73c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 28 Nov 2023 20:15:23 -0800 Subject: [PATCH 06/30] working for positive decimal values --- c/driver/postgresql/postgres_copy_reader.h | 84 +++++++++++++++------- c/validation/adbc_validation.cc | 13 ++-- 2 files changed, 64 insertions(+), 33 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 1034cf3c63..888335d0f7 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1231,7 +1231,6 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); constexpr uint16_t kNumericPos = 0x0000; constexpr uint16_t kNumericNeg = 0x4000; - constexpr int64_t kNBase = 10000; // Number of decimal digits per Postgres digit constexpr int kDecDigits = 4; @@ -1239,55 +1238,56 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { // more work required for 256 support uint8_t bytes_tmp[32]; ArrowDecimalGetBytes(&decimal, bytes_tmp); - __int128 tmp; - std::memcpy(&tmp, bytes_tmp, sizeof(tmp)); + uint64_t tmp[2]; + std::memcpy(tmp, bytes_tmp, sizeof(tmp)); - // TODO: is -INT64_MIN possible? If so how do we handle? - unsigned __int128 value = tmp < 0 ? -tmp : tmp; std::vector pg_digits; int16_t weight = -(scale / kDecDigits); int16_t dscale = scale; - constexpr size_t nloops = (precision + kDecDigits - 1) / kDecDigits; bool seen_decimal = scale == 0; bool truncating_trailing_zeros = true; - for (size_t i = 0; i < nloops; i++) { - const unsigned __int128 rem = value % kNBase; - // TODO: postgres seems to pack records to the left of a decimal place - // internally, so 1000000.0 would be sent as one digit of 100 with - // a weight of 1 (there are weight + 1 pg digits to the left of a decimal place) - // Here we still send two digits of 100 and 0000 - if (rem == 0) { + const std::string decimal_string = DecimalBytesToString(tmp, 2); + const std::string_view vw = decimal_string; + int digits_remaining = vw.size(); + do { + const int start_pos = digits_remaining < kDecDigits ? + 0 : digits_remaining - kDecDigits; + // TODO: would be great to use a string_view here but wasn't sure + // how to make that work with stoi + const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; + std::string substr{vw.substr(start_pos, len)}; + //size_t ndigits; TODO: maybe should use ndigits output + int16_t val = static_cast(std::stoi(substr.data())); + + if (val == 0) { if (!seen_decimal && truncating_trailing_zeros) { dscale -= kDecDigits; } } else { - pg_digits.insert(pg_digits.begin(), rem); + pg_digits.insert(pg_digits.begin(), val); if (!seen_decimal && truncating_trailing_zeros) { - if (rem % 1000 == 0) { + if (val % 1000 == 0) { dscale -= 3; - } else if (rem % 100 == 0) { + } else if (val % 100 == 0) { dscale -= 2; - } else if (rem % 10 == 0) { + } else if (val % 10 == 0) { dscale -= 1; } } truncating_trailing_zeros = false; } - - // TODO: how does pg deal with words when integer and decimal part are sent - // in same word? - value /= kNBase; - if (value == 0) { + digits_remaining -= kDecDigits; + if (digits_remaining <= 0) { break; } weight++; - if (i >= scale / kDecDigits - 1) { + if (start_pos <= static_cast(vw.size()) - scale) { seen_decimal = true; } - } + } while (true); int16_t ndigits = pg_digits.size(); const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; @@ -1310,6 +1310,42 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { return ADBC_STATUS_OK; } + +private: + std::string DecimalBytesToString(const uint64_t* decimal_bytes, size_t size) { + // Basic approach adopted from https://stackoverflow.com/a/8023862/621736 + // This currently only works with decimal128 + char s[38]; + uint64_t buf[2]; + + std::memset(s, '0', sizeof(s) - 1); + s[sizeof(s) - 1] = '\0'; + + std::memcpy(buf, decimal_bytes, sizeof(buf)); + + for (size_t i = 0; i < 128; i++) { + int carry; + + carry = (buf[1] >= 0x7FFFFFFFFFFFFFFF); + buf[1] = ((buf[1] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[0] >= 0x7FFFFFFFFFFFFFFF); + buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF); + + for (int j = sizeof(s) - 2; j>= 0; j--) { + s[j] += s[j] - '0' + carry; + carry = (s[j] > '9'); + if (carry) { + s[j] -= 10; + } + } + } + + char* p = s; + while ((p[0] == '0') && (p < &s[sizeof(s) - 2])) { + p++; + } + + return std::string{p}; + } }; template diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index 324c837772..5980bda912 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1557,7 +1557,7 @@ void StatementTest::TestSqlIngestDecimal128() { struct ArrowDecimal decimal6; ArrowDecimalInit(&decimal1, size, 38, 8); - ArrowDecimalSetInt(&decimal1, -12345600000); + ArrowDecimalSetInt(&decimal1, 12345600000); ArrowDecimalInit(&decimal2, size, 38, 8); ArrowDecimalSetInt(&decimal2, 1234); ArrowDecimalInit(&decimal3, size, 38, 8); @@ -1567,16 +1567,11 @@ void StatementTest::TestSqlIngestDecimal128() { ArrowDecimalInit(&decimal5, size, 38, 8); ArrowDecimalSetInt(&decimal5, 100000000000000); ArrowDecimalInit(&decimal6, size, 38, 8); - // 2342394230592.232349023094 in little endian + // 23423942305922323.49023094 in little endian uint8_t le_data[16] = { 0x76, 0xbb, 0xc8, 0x2c, 0x1c, 0x2b, 0x18, 0x72, 0x05, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00}; ArrowDecimalSetBytes(&decimal6, le_data); - // - const uint64_t large_decimal[2] = {1, 2345678901234567890}; - uint8_t large_decimal_bytes[16]; - std::memcpy(&large_decimal_bytes, large_decimal, sizeof(large_decimal_bytes)); - ArrowDecimalSetBytes(&decimal6, large_decimal_bytes); const std::vector> values = { std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5, &decimal6}; @@ -1629,8 +1624,8 @@ void StatementTest::TestSqlIngestDecimal128() { //} const std::vector> str_values = { - std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000", - "2342394230592.232349023094"}; + std::nullopt, "0.00001234", "1", "123.456", "123.456", "1000000", + "23423942305922323.49023094"}; ASSERT_NO_FATAL_FAILURE( CompareArray(reader.array_view->children[0], str_values)); From 94bf657fa3640dc733625dbade6b5e8caeb08338 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 28 Nov 2023 21:32:46 -0800 Subject: [PATCH 07/30] negative value support --- c/driver/postgresql/postgres_copy_reader.h | 7 +++++-- c/validation/adbc_validation.cc | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 888335d0f7..7482af2df9 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1240,6 +1240,11 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { ArrowDecimalGetBytes(&decimal, bytes_tmp); uint64_t tmp[2]; std::memcpy(tmp, bytes_tmp, sizeof(tmp)); + const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; + if (sign == kNumericNeg) { + tmp[0] = ~tmp[0] + 1; + tmp[1] = ~tmp[1]; + } std::vector pg_digits; @@ -1290,8 +1295,6 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } while (true); int16_t ndigits = pg_digits.size(); - const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; - int32_t field_size_bytes = sizeof(ndigits) + sizeof(weight) + sizeof(sign) diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index 5980bda912..bcb2fab1d6 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1557,7 +1557,7 @@ void StatementTest::TestSqlIngestDecimal128() { struct ArrowDecimal decimal6; ArrowDecimalInit(&decimal1, size, 38, 8); - ArrowDecimalSetInt(&decimal1, 12345600000); + ArrowDecimalSetInt(&decimal1, -12345600000); ArrowDecimalInit(&decimal2, size, 38, 8); ArrowDecimalSetInt(&decimal2, 1234); ArrowDecimalInit(&decimal3, size, 38, 8); @@ -1624,7 +1624,7 @@ void StatementTest::TestSqlIngestDecimal128() { //} const std::vector> str_values = { - std::nullopt, "0.00001234", "1", "123.456", "123.456", "1000000", + std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000", "23423942305922323.49023094"}; ASSERT_NO_FATAL_FAILURE( CompareArray(reader.array_view->children[0], str_values)); From 4b499990d78298e63e012d888368d527e4a60d8d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 28 Nov 2023 21:47:49 -0800 Subject: [PATCH 08/30] skip other drivers --- c/driver/flightsql/sqlite_flightsql_test.cc | 3 +++ c/driver/sqlite/sqlite_test.cc | 4 ++++ c/driver_manager/adbc_driver_manager_test.cc | 3 +++ 3 files changed, 10 insertions(+) diff --git a/c/driver/flightsql/sqlite_flightsql_test.cc b/c/driver/flightsql/sqlite_flightsql_test.cc index a736d25398..640ae5080f 100644 --- a/c/driver/flightsql/sqlite_flightsql_test.cc +++ b/c/driver/flightsql/sqlite_flightsql_test.cc @@ -274,6 +274,9 @@ class SqliteFlightSqlStatementTest : public ::testing::Test, void TestSqlIngestInterval() { GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; } + void TestSqlIngestDecimal128() { + GTEST_SKIP() << "Cannot ingest Decimal128 (not implemented)"; + } void TestSqlQueryRowsAffectedDelete() { GTEST_SKIP() << "Cannot query rows affected in delete (not implemented)"; } diff --git a/c/driver/sqlite/sqlite_test.cc b/c/driver/sqlite/sqlite_test.cc index 70a1ad6708..c8af54a169 100644 --- a/c/driver/sqlite/sqlite_test.cc +++ b/c/driver/sqlite/sqlite_test.cc @@ -326,6 +326,10 @@ class SqliteStatementTest : public ::testing::Test, GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; } + void TestSqlIngestDecimal128() { + GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; + } + protected: void ValidateIngestedTemporalData(struct ArrowArrayView* values, ArrowType type, enum ArrowTimeUnit unit, diff --git a/c/driver_manager/adbc_driver_manager_test.cc b/c/driver_manager/adbc_driver_manager_test.cc index 18e0a87dce..6fcda34cec 100644 --- a/c/driver_manager/adbc_driver_manager_test.cc +++ b/c/driver_manager/adbc_driver_manager_test.cc @@ -279,6 +279,9 @@ class SqliteStatementTest : public ::testing::Test, void TestSqlIngestInterval() { GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; } + void TestSqlIngestDecimal128() { + GTEST_SKIP() << "Cannot ingest Decimal128 (not implemented)"; + } protected: SqliteQuirks quirks_; From c046632f0c9880cc4986e69db559c3d1454117cb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 28 Nov 2023 21:57:30 -0800 Subject: [PATCH 09/30] No std::string_view --- c/driver/postgresql/postgres_copy_reader.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 7482af2df9..436f44fd96 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1254,15 +1254,14 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { bool truncating_trailing_zeros = true; const std::string decimal_string = DecimalBytesToString(tmp, 2); - const std::string_view vw = decimal_string; - int digits_remaining = vw.size(); + int digits_remaining = decimal_string.size(); do { const int start_pos = digits_remaining < kDecDigits ? 0 : digits_remaining - kDecDigits; // TODO: would be great to use a string_view here but wasn't sure // how to make that work with stoi const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; - std::string substr{vw.substr(start_pos, len)}; + std::string substr{decimal_string.substr(start_pos, len)}; //size_t ndigits; TODO: maybe should use ndigits output int16_t val = static_cast(std::stoi(substr.data())); @@ -1289,7 +1288,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } weight++; - if (start_pos <= static_cast(vw.size()) - scale) { + if (start_pos <= static_cast(decimal_string.size()) - scale) { seen_decimal = true; } } while (true); From 3957b6d7d6b7000344d7b187ff59a6cd86361f01 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 28 Nov 2023 22:06:51 -0800 Subject: [PATCH 10/30] cleanups --- c/cmake_modules/AdbcDefines.cmake | 7 ++++++- c/validation/adbc_validation.cc | 8 -------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/c/cmake_modules/AdbcDefines.cmake b/c/cmake_modules/AdbcDefines.cmake index 79224df196..5cc2eaa60b 100644 --- a/c/cmake_modules/AdbcDefines.cmake +++ b/c/cmake_modules/AdbcDefines.cmake @@ -75,7 +75,12 @@ if(MSVC) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(ADBC_C_CXX_FLAGS_CHECKIN -Wall -Wextra -Werror -Wno-unused-parameter) + set(ADBC_C_CXX_FLAGS_CHECKIN + -Wall + -Wextra + -Wpedantic + -Werror + -Wno-unused-parameter) set(ADBC_C_CXX_FLAGS_PRODUCTION -Wall) else() message(WARNING "Unknown compiler: ${CMAKE_CXX_COMPILER_ID}") diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index bcb2fab1d6..1137da41bc 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1615,14 +1615,6 @@ void StatementTest::TestSqlIngestDecimal128() { ASSERT_EQ(values.size(), reader.array->length); ASSERT_EQ(1, reader.array->n_children); - // Currently postgres roundtrips to string, but in the future we should - // roundtrip to a decimal - // - //if (round_trip_type == type) { - // ASSERT_NO_FATAL_FAILURE( - // CompareArray(reader.array_view->children[0], values)); - //} - const std::vector> str_values = { std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000", "23423942305922323.49023094"}; From c5d19bb9444705d0347ff4546806c1977de89c34 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 29 Nov 2023 14:45:17 -0800 Subject: [PATCH 11/30] more generic ToString --- c/driver/postgresql/postgres_copy_reader.h | 55 ++++++++++++---------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 436f44fd96..f665c6f21e 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1227,33 +1227,22 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { // TODO: these need to be inferred from the schema not hard coded constexpr int16_t precision = 38; constexpr int16_t scale = 8; - ArrowDecimalInit(&decimal, 128, precision, scale); + constexpr int32_t decimal_width = 128; + + ArrowDecimalInit(&decimal, decimal_width, precision, scale); ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); - constexpr uint16_t kNumericPos = 0x0000; - constexpr uint16_t kNumericNeg = 0x4000; // Number of decimal digits per Postgres digit constexpr int kDecDigits = 4; - // This assumes that we are dealing with Decimal128 - // more work required for 256 support - uint8_t bytes_tmp[32]; - ArrowDecimalGetBytes(&decimal, bytes_tmp); - uint64_t tmp[2]; - std::memcpy(tmp, bytes_tmp, sizeof(tmp)); const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; - if (sign == kNumericNeg) { - tmp[0] = ~tmp[0] + 1; - tmp[1] = ~tmp[1]; - } std::vector pg_digits; - int16_t weight = -(scale / kDecDigits); int16_t dscale = scale; bool seen_decimal = scale == 0; bool truncating_trailing_zeros = true; - const std::string decimal_string = DecimalBytesToString(tmp, 2); + const std::string decimal_string = DecimalToString(&decimal); int digits_remaining = decimal_string.size(); do { const int start_pos = digits_remaining < kDecDigits ? @@ -1314,22 +1303,38 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } private: - std::string DecimalBytesToString(const uint64_t* decimal_bytes, size_t size) { - // Basic approach adopted from https://stackoverflow.com/a/8023862/621736 - // This currently only works with decimal128 - char s[38]; - uint64_t buf[2]; + static constexpr uint16_t kNumericPos = 0x0000; + static constexpr uint16_t kNumericNeg = 0x4000; + + template + std::string DecimalToString(struct ArrowDecimal* decimal) { + constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; + uint8_t tmp[DEC_WIDTH / 8]; + ArrowDecimalGetBytes(decimal, tmp); + uint64_t buf[DEC_WIDTH / 64]; + std::memcpy(buf, tmp, sizeof(buf)); + const int16_t sign = ArrowDecimalSign(decimal) > 0 ? kNumericPos : kNumericNeg; + const bool is_negative = sign == kNumericNeg ? true : false; + if (is_negative) { + buf[0] = ~buf[0] + 1; + for (size_t i = 1; i < nwords; i++) { + buf[i] = ~buf[i]; + } + } + constexpr size_t max_decimal_digits = (DEC_WIDTH == 128) ? 39 : 78; + // Basic approach adopted from https://stackoverflow.com/a/8023862/621736 + char s[max_decimal_digits + 1]; std::memset(s, '0', sizeof(s) - 1); s[sizeof(s) - 1] = '\0'; - std::memcpy(buf, decimal_bytes, sizeof(buf)); - - for (size_t i = 0; i < 128; i++) { + for (size_t i = 0; i < DEC_WIDTH; i++) { int carry; - carry = (buf[1] >= 0x7FFFFFFFFFFFFFFF); - buf[1] = ((buf[1] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[0] >= 0x7FFFFFFFFFFFFFFF); + carry = (buf[nwords - 1] >= 0x7FFFFFFFFFFFFFFF); + for (size_t j = nwords - 1; j > 0; j--) { + buf[j] = ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j-1] >= 0x7FFFFFFFFFFFFFFF); + } buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF); for (int j = sizeof(s) - 2; j>= 0; j--) { From ba44774af39a6a3af0b8dee7639a7fcd982a270b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 29 Nov 2023 15:35:48 -0800 Subject: [PATCH 12/30] don't hardcode precision and scale --- c/driver/postgresql/postgres_copy_reader.h | 46 +++++++++++----------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index f665c6f21e..e965851514 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1217,41 +1217,36 @@ class PostgresCopyIntervalFieldWriter : public PostgresCopyFieldWriter { } }; - // Inspiration for this taken from get_str_from_var in the pg source // src/backend/utils/adt/numeric.c +template class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { public: + PostgresCopyNumericFieldWriter(int32_t precision, int32_t scale) : + precision_{precision}, scale_{scale} {} + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { struct ArrowDecimal decimal; - // TODO: these need to be inferred from the schema not hard coded - constexpr int16_t precision = 38; - constexpr int16_t scale = 8; - constexpr int32_t decimal_width = 128; - - ArrowDecimalInit(&decimal, decimal_width, precision, scale); + ArrowDecimalInit(&decimal, bitwidth_, precision_, scale_); ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); - // Number of decimal digits per Postgres digit - constexpr int kDecDigits = 4; const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; + // Number of decimal digits per Postgres digit + constexpr int kDecDigits = 4; std::vector pg_digits; - int16_t weight = -(scale / kDecDigits); - int16_t dscale = scale; - bool seen_decimal = scale == 0; + int16_t weight = -(scale_ / kDecDigits); + int16_t dscale = scale_; + bool seen_decimal = scale_ == 0; bool truncating_trailing_zeros = true; - const std::string decimal_string = DecimalToString(&decimal); + const std::string decimal_string = DecimalToString(&decimal); int digits_remaining = decimal_string.size(); do { const int start_pos = digits_remaining < kDecDigits ? 0 : digits_remaining - kDecDigits; - // TODO: would be great to use a string_view here but wasn't sure - // how to make that work with stoi const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; std::string substr{decimal_string.substr(start_pos, len)}; - //size_t ndigits; TODO: maybe should use ndigits output int16_t val = static_cast(std::stoi(substr.data())); if (val == 0) { @@ -1277,7 +1272,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } weight++; - if (start_pos <= static_cast(decimal_string.size()) - scale) { + if (start_pos <= static_cast(decimal_string.size()) - scale_) { seen_decimal = true; } } while (true); @@ -1303,9 +1298,6 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } private: - static constexpr uint16_t kNumericPos = 0x0000; - static constexpr uint16_t kNumericNeg = 0x4000; - template std::string DecimalToString(struct ArrowDecimal* decimal) { constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; @@ -1353,6 +1345,12 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { return std::string{p}; } + + static constexpr uint16_t kNumericPos = 0x0000; + static constexpr uint16_t kNumericNeg = 0x4000; + static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; + const int32_t precision_; + const int32_t scale_; }; template @@ -1517,9 +1515,13 @@ static inline ArrowErrorCode MakeCopyFieldWriter(struct ArrowSchema* schema, case NANOARROW_TYPE_DOUBLE: *out = new PostgresCopyDoubleFieldWriter(); return NANOARROW_OK; - case NANOARROW_TYPE_DECIMAL128: - *out = new PostgresCopyNumericFieldWriter(); + case NANOARROW_TYPE_DECIMAL128: { + const auto precision = schema_view.decimal_precision; + const auto scale = schema_view.decimal_scale; + *out = new PostgresCopyNumericFieldWriter< + NANOARROW_TYPE_DECIMAL128>(precision, scale); return NANOARROW_OK; + } case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_LARGE_STRING: From 06b6349f080d3815aa4b0c60a3381988ccacb681 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 29 Nov 2023 16:03:56 -0800 Subject: [PATCH 13/30] Decimal256 Support --- c/driver/flightsql/sqlite_flightsql_test.cc | 3 + c/driver/postgresql/postgres_copy_reader.h | 7 ++ c/driver/postgresql/postgresql_test.cc | 1 + c/driver/postgresql/statement.cc | 2 + c/driver/sqlite/sqlite_test.cc | 6 +- c/driver_manager/adbc_driver_manager_test.cc | 3 + c/validation/adbc_validation.cc | 94 ++++++++++++++++++++ c/validation/adbc_validation.h | 2 + c/validation/adbc_validation_util.cc | 11 ++- 9 files changed, 125 insertions(+), 4 deletions(-) diff --git a/c/driver/flightsql/sqlite_flightsql_test.cc b/c/driver/flightsql/sqlite_flightsql_test.cc index 640ae5080f..08559b7eb4 100644 --- a/c/driver/flightsql/sqlite_flightsql_test.cc +++ b/c/driver/flightsql/sqlite_flightsql_test.cc @@ -277,6 +277,9 @@ class SqliteFlightSqlStatementTest : public ::testing::Test, void TestSqlIngestDecimal128() { GTEST_SKIP() << "Cannot ingest Decimal128 (not implemented)"; } + void TestSqlIngestDecimal256() { + GTEST_SKIP() << "Cannot ingest Decimal256 (not implemented)"; + } void TestSqlQueryRowsAffectedDelete() { GTEST_SKIP() << "Cannot query rows affected in delete (not implemented)"; } diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index e965851514..a72ac5d923 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1522,6 +1522,13 @@ static inline ArrowErrorCode MakeCopyFieldWriter(struct ArrowSchema* schema, NANOARROW_TYPE_DECIMAL128>(precision, scale); return NANOARROW_OK; } + case NANOARROW_TYPE_DECIMAL256: { + const auto precision = schema_view.decimal_precision; + const auto scale = schema_view.decimal_scale; + *out = new PostgresCopyNumericFieldWriter< + NANOARROW_TYPE_DECIMAL256>(precision, scale); + return NANOARROW_OK; + } case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_LARGE_STRING: diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index 70e8a153cf..b0a335d6fd 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -96,6 +96,7 @@ class PostgresQuirks : public adbc_validation::DriverQuirks { case NANOARROW_TYPE_LARGE_STRING: return NANOARROW_TYPE_STRING; case NANOARROW_TYPE_DECIMAL128: + case NANOARROW_TYPE_DECIMAL256: return NANOARROW_TYPE_STRING; default: return ingest_type; diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 5f2217aa62..5206d8dc58 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -211,6 +211,7 @@ struct BindStream { param_lengths[i] = 16; break; case ArrowType::NANOARROW_TYPE_DECIMAL128: + case ArrowType::NANOARROW_TYPE_DECIMAL256: type_id = PostgresTypeId::kNumeric; param_lengths[i] = 0; break; @@ -1061,6 +1062,7 @@ AdbcStatusCode PostgresStatement::CreateBulkTable( create += " INTERVAL"; break; case ArrowType::NANOARROW_TYPE_DECIMAL128: + case ArrowType::NANOARROW_TYPE_DECIMAL256: create += " DECIMAL"; break; case ArrowType::NANOARROW_TYPE_DICTIONARY: { diff --git a/c/driver/sqlite/sqlite_test.cc b/c/driver/sqlite/sqlite_test.cc index c8af54a169..aa3a11fbc5 100644 --- a/c/driver/sqlite/sqlite_test.cc +++ b/c/driver/sqlite/sqlite_test.cc @@ -325,9 +325,11 @@ class SqliteStatementTest : public ::testing::Test, void TestSqlIngestInterval() { GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; } - void TestSqlIngestDecimal128() { - GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; + GTEST_SKIP() << "Cannot ingest Decimal128 (not implemented)"; + } + void TestSqlIngestDecimal256() { + GTEST_SKIP() << "Cannot ingest Decimal256 (not implemented)"; } protected: diff --git a/c/driver_manager/adbc_driver_manager_test.cc b/c/driver_manager/adbc_driver_manager_test.cc index 6fcda34cec..7e6bbc4302 100644 --- a/c/driver_manager/adbc_driver_manager_test.cc +++ b/c/driver_manager/adbc_driver_manager_test.cc @@ -282,6 +282,9 @@ class SqliteStatementTest : public ::testing::Test, void TestSqlIngestDecimal128() { GTEST_SKIP() << "Cannot ingest Decimal128 (not implemented)"; } + void TestSqlIngestDecimal256() { + GTEST_SKIP() << "Cannot ingest Decimal256 (not implemented)"; + } protected: SqliteQuirks quirks_; diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index 1137da41bc..c261f3c433 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1627,6 +1627,100 @@ void StatementTest::TestSqlIngestDecimal128() { ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); } +void StatementTest::TestSqlIngestDecimal256() { + if (!quirks()->supports_bulk_ingest(ADBC_INGEST_OPTION_MODE_CREATE)) { + GTEST_SKIP(); + } + + ASSERT_THAT(quirks()->DropTable(&connection, "bulk_ingest", &error), + IsOkStatus(&error)); + + Handle schema; + Handle array; + struct ArrowError na_error; + constexpr int32_t bitwidth = 256; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL256; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal4; + struct ArrowDecimal decimal5; + struct ArrowDecimal decimal6; + + ArrowDecimalInit(&decimal1, bitwidth, 38, 8); + ArrowDecimalSetInt(&decimal1, -12345600000); + ArrowDecimalInit(&decimal2, bitwidth, 38, 8); + ArrowDecimalSetInt(&decimal2, 1234); + ArrowDecimalInit(&decimal3, bitwidth, 38, 8); + ArrowDecimalSetInt(&decimal3, 100000000); + ArrowDecimalInit(&decimal4, bitwidth, 38, 8); + ArrowDecimalSetInt(&decimal4, 12345600000); + ArrowDecimalInit(&decimal5, bitwidth, 38, 8); + ArrowDecimalSetInt(&decimal5, 100000000000000); + ArrowDecimalInit(&decimal6, bitwidth, 38, 8); + + uint64_t le_data64[4] = {17877984925544397504ULL, 5352188884907840935ULL, + 234631617561833724ULL, 196678011949953713ULL}; + uint8_t le_data[32]; + memcpy(le_data, le_data64, sizeof(le_data64)); + ArrowDecimalSetBytes(&decimal6, le_data); + + const std::vector> values = { + std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5, &decimal6}; + + ASSERT_THAT(MakeSchema(&schema.value, {{"col", type}}), IsOkErrno()); + ASSERT_THAT(MakeBatch(&schema.value, &array.value, + &na_error, values), IsOkErrno()); + + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_TARGET_TABLE, + "bulk_ingest", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementBind(&statement, &array.value, &schema.value, &error), + IsOkStatus(&error)); + + int64_t rows_affected = 0; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, &rows_affected, &error), + IsOkStatus(&error)); + ASSERT_THAT(rows_affected, + ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); + + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, + "SELECT * FROM bulk_ingest ORDER BY \"col\" ASC NULLS FIRST", &error), + IsOkStatus(&error)); + { + StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_THAT(reader.rows_affected, + ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); + + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ArrowType round_trip_type = quirks()->IngestSelectRoundTripType(type); + ASSERT_NO_FATAL_FAILURE( + CompareSchema(&reader.schema.value, {{"col", round_trip_type, NULLABLE}})); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_EQ(values.size(), reader.array->length); + ASSERT_EQ(1, reader.array->n_children); + + const std::vector> str_values = { + std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000", + "12345678901234567890123456789012345678901234567890123456789012345678.90123456"}; + ASSERT_NO_FATAL_FAILURE( + CompareArray(reader.array_view->children[0], str_values)); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(nullptr, reader.array->release); + } + ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); +} + + void StatementTest::TestSqlIngestString() { ASSERT_NO_FATAL_FAILURE(TestSqlIngestType( NANOARROW_TYPE_STRING, {std::nullopt, "", "", "1234", "例"}, false)); diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h index 44b9e98316..82b0eb6131 100644 --- a/c/validation/adbc_validation.h +++ b/c/validation/adbc_validation.h @@ -330,6 +330,7 @@ class StatementTest { // Decmial void TestSqlIngestDecimal128(); + void TestSqlIngestDecimal256(); // Strings void TestSqlIngestString(); @@ -438,6 +439,7 @@ class StatementTest { TEST_F(FIXTURE, SqlIngestFloat32) { TestSqlIngestFloat32(); } \ TEST_F(FIXTURE, SqlIngestFloat64) { TestSqlIngestFloat64(); } \ TEST_F(FIXTURE, SqlIngestDecimal128) { TestSqlIngestDecimal128(); } \ + TEST_F(FIXTURE, SqlIngestDecimal256) { TestSqlIngestDecimal256(); } \ TEST_F(FIXTURE, SqlIngestString) { TestSqlIngestString(); } \ TEST_F(FIXTURE, SqlIngestLargeString) { TestSqlIngestLargeString(); } \ TEST_F(FIXTURE, SqlIngestBinary) { TestSqlIngestBinary(); } \ diff --git a/c/validation/adbc_validation_util.cc b/c/validation/adbc_validation_util.cc index b72bcb69c6..ade8cf0995 100644 --- a/c/validation/adbc_validation_util.cc +++ b/c/validation/adbc_validation_util.cc @@ -143,10 +143,17 @@ int MakeSchema(struct ArrowSchema* schema, const std::vector& field for (const SchemaField& field : fields) { switch (field.type) { case NANOARROW_TYPE_DECIMAL128: - // TODO: don't hardcore 19, 8 + // TODO: how can we avoid hard-coding precision and scale? CHECK_ERRNO(AdbcNsArrowSchemaSetTypeDecimal(schema->children[i], field.type, - 19, + 38, + 8)); + break; + case NANOARROW_TYPE_DECIMAL256: + // TODO: how can we avoid hard-coding precision and scale? + CHECK_ERRNO(AdbcNsArrowSchemaSetTypeDecimal(schema->children[i], + field.type, + 76, 8)); break; default: From bc19709dfd062b3fc5c6df6470697f34984d866e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 29 Nov 2023 16:08:50 -0800 Subject: [PATCH 14/30] remove dead code --- c/validation/adbc_validation_util.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/c/validation/adbc_validation_util.h b/c/validation/adbc_validation_util.h index ef048e039c..321b10f9d8 100644 --- a/c/validation/adbc_validation_util.h +++ b/c/validation/adbc_validation_util.h @@ -416,17 +416,6 @@ void CompareArray(struct ArrowArrayView* array, ASSERT_EQ(interval.months, (*v)->months); ASSERT_EQ(interval.days, (*v)->days); ASSERT_EQ(interval.ns, (*v)->ns); - } else if constexpr (std::is_same::value) { - ASSERT_NE(array->buffer_views[1].data.data, nullptr); - struct ArrowDecimal decimal; - // For now assuming Decimal128 so set as bitwidth - ArrowDecimalInit(&decimal, 128, (*v)->precision, (*v)->scale); - ArrowArrayViewGetDecimalUnsafe(array, i, &decimal); - - ASSERT_EQ(decimal.n_words, (*v)->n_words); - // For now assuming Decimal128 so only need to check first two words - ASSERT_EQ(decimal.words[0], (*v)->words[0]); - ASSERT_EQ(decimal.words[1], (*v)->words[1]); } else { static_assert(!sizeof(T), "Not yet implemented"); } From e9967a79039a4f8f3ca9623c2517855d2f4b76bf Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 14 Dec 2023 16:47:20 -0500 Subject: [PATCH 15/30] less string --- c/driver/postgresql/postgres_copy_reader.h | 24 ++++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index a72ac5d923..2bc22283e3 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1240,14 +1240,17 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { bool seen_decimal = scale_ == 0; bool truncating_trailing_zeros = true; - const std::string decimal_string = DecimalToString(&decimal); - int digits_remaining = decimal_string.size(); + char decimal_string[max_decimal_digits_ + 1]; + DecimalToString(&decimal, decimal_string); + int digits_remaining = std::strlen(decimal_string); do { const int start_pos = digits_remaining < kDecDigits ? 0 : digits_remaining - kDecDigits; const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; - std::string substr{decimal_string.substr(start_pos, len)}; - int16_t val = static_cast(std::stoi(substr.data())); + char substr[kDecDigits + 1]; + std::memcpy(substr, decimal_string + start_pos, len); + substr[len] = '\0'; + int16_t val = static_cast(std::atoi(substr)); if (val == 0) { if (!seen_decimal && truncating_trailing_zeros) { @@ -1272,7 +1275,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } weight++; - if (start_pos <= static_cast(decimal_string.size()) - scale_) { + if (start_pos <= static_cast(std::strlen(decimal_string)) - scale_) { seen_decimal = true; } } while (true); @@ -1298,8 +1301,9 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } private: + // TODO: maybe we should return strlen here template - std::string DecimalToString(struct ArrowDecimal* decimal) { + void DecimalToString(struct ArrowDecimal* decimal, char* out) { constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; uint8_t tmp[DEC_WIDTH / 8]; ArrowDecimalGetBytes(decimal, tmp); @@ -1314,9 +1318,8 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } } - constexpr size_t max_decimal_digits = (DEC_WIDTH == 128) ? 39 : 78; // Basic approach adopted from https://stackoverflow.com/a/8023862/621736 - char s[max_decimal_digits + 1]; + char s[max_decimal_digits_ + 1]; std::memset(s, '0', sizeof(s) - 1); s[sizeof(s) - 1] = '\0'; @@ -1343,12 +1346,15 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { p++; } - return std::string{p}; + const size_t ndigits = sizeof(s) - (p - s); + std::memcpy(out, p, ndigits); + out[ndigits] = '\0'; } static constexpr uint16_t kNumericPos = 0x0000; static constexpr uint16_t kNumericNeg = 0x4000; static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; + static constexpr size_t max_decimal_digits_ = (T == NANOARROW_TYPE_DECIMAL128) ? 39 : 78; const int32_t precision_; const int32_t scale_; }; From 6a0d3c99fad88115ce8a1d94f74d868988c530e9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 16 Dec 2023 17:14:16 -0500 Subject: [PATCH 16/30] Allocate up front --- c/driver/postgresql/postgres_copy_reader.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 2bc22283e3..ecfdde4ba1 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1293,8 +1293,10 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); + const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size(); + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); for (auto pg_digit : pg_digits) { - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, pg_digit, error)); + WriteUnsafe(buffer, pg_digit); } return ADBC_STATUS_OK; From df7ba3e4999f3120362b24f66b1f756340d23baf Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 14:27:02 -0500 Subject: [PATCH 17/30] compiling with lifecycle issues --- c/driver/flightsql/sqlite_flightsql_test.cc | 6 - c/driver/postgresql/postgresql_test.cc | 145 ++++++++++++++++++- c/driver/sqlite/sqlite_test.cc | 6 - c/driver_manager/adbc_driver_manager_test.cc | 6 - 4 files changed, 144 insertions(+), 19 deletions(-) diff --git a/c/driver/flightsql/sqlite_flightsql_test.cc b/c/driver/flightsql/sqlite_flightsql_test.cc index 08559b7eb4..a736d25398 100644 --- a/c/driver/flightsql/sqlite_flightsql_test.cc +++ b/c/driver/flightsql/sqlite_flightsql_test.cc @@ -274,12 +274,6 @@ class SqliteFlightSqlStatementTest : public ::testing::Test, void TestSqlIngestInterval() { GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; } - void TestSqlIngestDecimal128() { - GTEST_SKIP() << "Cannot ingest Decimal128 (not implemented)"; - } - void TestSqlIngestDecimal256() { - GTEST_SKIP() << "Cannot ingest Decimal256 (not implemented)"; - } void TestSqlQueryRowsAffectedDelete() { GTEST_SKIP() << "Cannot query rows affected in delete (not implemented)"; } diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index b0a335d6fd..9986dfd7b9 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1425,7 +1425,6 @@ TEST_P(PostgresTypeTest, SelectValue) { ASSERT_NE(nullptr, reader.array->release); ASSERT_FALSE(ArrowArrayViewIsNull(&reader.array_view.value, 0)); ASSERT_FALSE(ArrowArrayViewIsNull(reader.array_view->children[0], 0)); - // check value ASSERT_NO_FATAL_FAILURE(std::visit( [&](auto&& arg) -> void { @@ -1630,3 +1629,147 @@ INSTANTIATE_TEST_SUITE_P(TimeTypes, PostgresTypeTest, testing::ValuesIn(kTimeTyp INSTANTIATE_TEST_SUITE_P(TimestampTypes, PostgresTypeTest, testing::ValuesIn(kTimestampTypeCases), TypeTestCase::FormatName); + +struct DecimalTestCase { + const enum ArrowType type; + const int32_t precision; + const int32_t scale; + const std::vector> data; + const std::vector> expected; +}; + +class PostgresDecimalTest : public ::testing::TestWithParam { +public: + void SetUp() override { + ASSERT_THAT(AdbcDatabaseNew(&database_, &error_), IsOkStatus(&error_)); + ASSERT_THAT(quirks_.SetupDatabase(&database_, &error_), IsOkStatus(&error_)); + ASSERT_THAT(AdbcDatabaseInit(&database_, &error_), IsOkStatus(&error_)); + + ASSERT_THAT(AdbcConnectionNew(&connection_, &error_), IsOkStatus(&error_)); + ASSERT_THAT(AdbcConnectionInit(&connection_, &database_, &error_), + IsOkStatus(&error_)); + + ASSERT_THAT(AdbcStatementNew(&connection_, &statement_, &error_), + IsOkStatus(&error_)); + + ASSERT_THAT(quirks_.DropTable(&connection_, "bulk_ingest", &error_), IsOkStatus(&error_)); + } + + void TearDown() override { + if (statement_.private_data) { + ASSERT_THAT(AdbcStatementRelease(&statement_, &error_), IsOkStatus(&error_)); + } + if (connection_.private_data) { + ASSERT_THAT(AdbcConnectionRelease(&connection_, &error_), IsOkStatus(&error_)); + } + if (database_.private_data) { + ASSERT_THAT(AdbcDatabaseRelease(&database_, &error_), IsOkStatus(&error_)); + } + + if (error_.release) error_.release(&error_); + } + +protected: + PostgresQuirks quirks_; + struct AdbcError error_ = {}; + struct AdbcDatabase database_ = {}; + struct AdbcConnection connection_ = {}; + struct AdbcStatement statement_ = {}; +}; + +TEST_P(PostgresDecimalTest, SelectValue) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + + const enum ArrowType type = GetParam().type; + const int32_t precision = GetParam().precision; + const int32_t scale = GetParam().scale; + const auto data = GetParam().data; + const auto expected = GetParam().expected; + const size_t nrecords = expected.size(); + + int32_t bitwidth; + switch (type) { + case NANOARROW_TYPE_DECIMAL128: + bitwidth = 128; + break; + case NANOARROW_TYPE_DECIMAL256: + bitwidth = 256; + break; + default: + ASSERT_TRUE(false) << " unsupported type for decimal parametrization"; + } + + std::vector> values; + //values.push_back(std::nullopt); + for (size_t i = 0; i < nrecords; i++) { + const auto record = data[i]; + struct ArrowDecimal decimal; + ArrowDecimalInit(&decimal, bitwidth, precision, scale); + ArrowDecimalSetBytes(&decimal, record.data()); + values.push_back(&decimal); + } + + ASSERT_THAT(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), adbc_validation::IsOkErrno()); + ASSERT_THAT(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), adbc_validation::IsOkErrno()); + + ASSERT_THAT(AdbcStatementSetOption(&statement_, + ADBC_INGEST_OPTION_TARGET_TABLE, + "bulk_ingest", &error_), + IsOkStatus(&error_)); + ASSERT_THAT(AdbcStatementBind(&statement_, &array.value, &schema.value, &error_), + IsOkStatus(&error_)); + + int64_t rows_affected = 0; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement_, nullptr, &rows_affected, &error_), + IsOkStatus(&error_)); + ASSERT_THAT(rows_affected, + ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); + + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement_, + "SELECT * FROM bulk_ingest ORDER BY \"col\" ASC NULLS FIRST", &error_), + IsOkStatus(&error_)); + + { + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement_, &reader.stream.value, + &reader.rows_affected, &error_), + IsOkStatus(&error_)); + ASSERT_THAT(reader.rows_affected, + ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); + + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ArrowType round_trip_type = quirks_.IngestSelectRoundTripType(type); + ASSERT_NO_FATAL_FAILURE( + adbc_validation::CompareSchema(&reader.schema.value, {{"col", round_trip_type, true}})); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(nullptr, reader.array->release); + ASSERT_EQ(values.size(), reader.array->length); + ASSERT_EQ(1, reader.array->n_children); + + ASSERT_NO_FATAL_FAILURE( + adbc_validation::CompareArray(reader.array_view->children[0], expected)); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(nullptr, reader.array->release); + } +} + +static std::initializer_list kDecimal128Cases = { + { + NANOARROW_TYPE_DECIMAL128, 38, 8, { + { + 0x0, 0x18, 0x25, 0x20, 0xfd, 0xff, 0xff, 0xff, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + }, + }, + {"-123.456"} + }}; + +INSTANTIATE_TEST_SUITE_P(Decimal128Tests, PostgresDecimalTest, testing::ValuesIn(kDecimal128Cases)); diff --git a/c/driver/sqlite/sqlite_test.cc b/c/driver/sqlite/sqlite_test.cc index aa3a11fbc5..70a1ad6708 100644 --- a/c/driver/sqlite/sqlite_test.cc +++ b/c/driver/sqlite/sqlite_test.cc @@ -325,12 +325,6 @@ class SqliteStatementTest : public ::testing::Test, void TestSqlIngestInterval() { GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; } - void TestSqlIngestDecimal128() { - GTEST_SKIP() << "Cannot ingest Decimal128 (not implemented)"; - } - void TestSqlIngestDecimal256() { - GTEST_SKIP() << "Cannot ingest Decimal256 (not implemented)"; - } protected: void ValidateIngestedTemporalData(struct ArrowArrayView* values, ArrowType type, diff --git a/c/driver_manager/adbc_driver_manager_test.cc b/c/driver_manager/adbc_driver_manager_test.cc index 7e6bbc4302..18e0a87dce 100644 --- a/c/driver_manager/adbc_driver_manager_test.cc +++ b/c/driver_manager/adbc_driver_manager_test.cc @@ -279,12 +279,6 @@ class SqliteStatementTest : public ::testing::Test, void TestSqlIngestInterval() { GTEST_SKIP() << "Cannot ingest Interval (not implemented)"; } - void TestSqlIngestDecimal128() { - GTEST_SKIP() << "Cannot ingest Decimal128 (not implemented)"; - } - void TestSqlIngestDecimal256() { - GTEST_SKIP() << "Cannot ingest Decimal256 (not implemented)"; - } protected: SqliteQuirks quirks_; From ac733bf92f2c9e401cf433398534d7d11032d554 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 15:46:32 -0500 Subject: [PATCH 18/30] lifecycle workarounds --- c/driver/postgresql/postgresql_test.cc | 53 ++++++++++++++++---------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index 9986dfd7b9..38aba5d54b 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1632,7 +1632,7 @@ INSTANTIATE_TEST_SUITE_P(TimestampTypes, PostgresTypeTest, struct DecimalTestCase { const enum ArrowType type; - const int32_t precision; + const int32_t precision; const int32_t scale; const std::vector> data; const std::vector> expected; @@ -1652,7 +1652,8 @@ class PostgresDecimalTest : public ::testing::TestWithParam { ASSERT_THAT(AdbcStatementNew(&connection_, &statement_, &error_), IsOkStatus(&error_)); - ASSERT_THAT(quirks_.DropTable(&connection_, "bulk_ingest", &error_), IsOkStatus(&error_)); + ASSERT_THAT(quirks_.DropTable(&connection_, "bulk_ingest", &error_), + IsOkStatus(&error_)); } void TearDown() override { @@ -1674,7 +1675,7 @@ class PostgresDecimalTest : public ::testing::TestWithParam { struct AdbcError error_ = {}; struct AdbcDatabase database_ = {}; struct AdbcConnection connection_ = {}; - struct AdbcStatement statement_ = {}; + struct AdbcStatement statement_ = {}; }; TEST_P(PostgresDecimalTest, SelectValue) { @@ -1688,7 +1689,7 @@ TEST_P(PostgresDecimalTest, SelectValue) { const auto data = GetParam().data; const auto expected = GetParam().expected; const size_t nrecords = expected.size(); - + int32_t bitwidth; switch (type) { case NANOARROW_TYPE_DECIMAL128: @@ -1701,27 +1702,37 @@ TEST_P(PostgresDecimalTest, SelectValue) { ASSERT_TRUE(false) << " unsupported type for decimal parametrization"; } + // this is a bit of a hack to make std::vector play nicely with + // a dynamic number of stack-allocated ArrowDecimal objects + constexpr size_t max_decimals = 10; + struct ArrowDecimal decimals[max_decimals]; + if (nrecords > max_decimals) { + ASSERT_TRUE(false) << + " max_decimals exceeded for test case - please change parametrization"; + } + std::vector> values; //values.push_back(std::nullopt); for (size_t i = 0; i < nrecords; i++) { const auto record = data[i]; - struct ArrowDecimal decimal; - ArrowDecimalInit(&decimal, bitwidth, precision, scale); - ArrowDecimalSetBytes(&decimal, record.data()); - values.push_back(&decimal); + ArrowDecimalInit(&decimals[i], bitwidth, precision, scale); + ArrowDecimalSetBytes(&decimals[i], record.data()); + values.push_back(&decimals[i]); } - ASSERT_THAT(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), adbc_validation::IsOkErrno()); + ASSERT_THAT(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), + adbc_validation::IsOkErrno()); ASSERT_THAT(adbc_validation::MakeBatch(&schema.value, &array.value, - &na_error, values), adbc_validation::IsOkErrno()); + &na_error, values), + adbc_validation::IsOkErrno()); ASSERT_THAT(AdbcStatementSetOption(&statement_, - ADBC_INGEST_OPTION_TARGET_TABLE, - "bulk_ingest", &error_), + ADBC_INGEST_OPTION_TARGET_TABLE, + "bulk_ingest", &error_), IsOkStatus(&error_)); ASSERT_THAT(AdbcStatementBind(&statement_, &array.value, &schema.value, &error_), IsOkStatus(&error_)); - + int64_t rows_affected = 0; ASSERT_THAT(AdbcStatementExecuteQuery(&statement_, nullptr, &rows_affected, &error_), IsOkStatus(&error_)); @@ -1736,23 +1747,24 @@ TEST_P(PostgresDecimalTest, SelectValue) { { adbc_validation::StreamReader reader; ASSERT_THAT(AdbcStatementExecuteQuery(&statement_, &reader.stream.value, - &reader.rows_affected, &error_), + &reader.rows_affected, &error_), IsOkStatus(&error_)); ASSERT_THAT(reader.rows_affected, ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ArrowType round_trip_type = quirks_.IngestSelectRoundTripType(type); - ASSERT_NO_FATAL_FAILURE( - adbc_validation::CompareSchema(&reader.schema.value, {{"col", round_trip_type, true}})); + ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareSchema(&reader.schema.value, + {{"col", + round_trip_type, true}})); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_NE(nullptr, reader.array->release); ASSERT_EQ(values.size(), reader.array->length); ASSERT_EQ(1, reader.array->n_children); - ASSERT_NO_FATAL_FAILURE( - adbc_validation::CompareArray(reader.array_view->children[0], expected)); + ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareArray< + std::string>(reader.array_view->children[0], expected)); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_EQ(nullptr, reader.array->release); @@ -1766,10 +1778,11 @@ static std::initializer_list kDecimal128Cases = { 0x0, 0x18, 0x25, 0x20, 0xfd, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, }, }, {"-123.456"} }}; -INSTANTIATE_TEST_SUITE_P(Decimal128Tests, PostgresDecimalTest, testing::ValuesIn(kDecimal128Cases)); +INSTANTIATE_TEST_SUITE_P(Decimal128Tests, PostgresDecimalTest, + testing::ValuesIn(kDecimal128Cases)); From 0cf303f7193a8826071b5bd67083532a036fceaf Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 16:31:34 -0500 Subject: [PATCH 19/30] Try parametrized postgres-test suite --- c/driver/postgresql/postgresql_test.cc | 67 +++++++-- c/validation/adbc_validation.cc | 193 ------------------------- c/validation/adbc_validation.h | 6 - 3 files changed, 58 insertions(+), 208 deletions(-) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index 38aba5d54b..a7de77ab90 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1425,6 +1425,7 @@ TEST_P(PostgresTypeTest, SelectValue) { ASSERT_NE(nullptr, reader.array->release); ASSERT_FALSE(ArrowArrayViewIsNull(&reader.array_view.value, 0)); ASSERT_FALSE(ArrowArrayViewIsNull(reader.array_view->children[0], 0)); + // check value ASSERT_NO_FATAL_FAILURE(std::visit( [&](auto&& arg) -> void { @@ -1771,18 +1772,66 @@ TEST_P(PostgresDecimalTest, SelectValue) { } } -static std::initializer_list kDecimal128Cases = { - { - NANOARROW_TYPE_DECIMAL128, 38, 8, { +static std::vector> kDecimal128Data = { + // -12345600000 + { + 0x00, 0x18, 0x25, 0x20, 0xfd, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + // 1234 + { + 0xd2, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + // 100000000 + { + 0x00, 0xe1, 0xf5, 0x05, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + // 12345600000 + { + 0x00, 0xe8, 0xda, 0xdf, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + // 100000000000000 + { + 0x00, 0x40, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + // 2342394230592232349023094 { - 0x0, 0x18, 0x25, 0x20, 0xfd, 0xff, 0xff, 0xff, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x76, 0xbb, 0xc8, 0x2c, 0x1c, 0x2b, 0x18, 0x72, + 0x05, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, - }, - {"-123.456"} +}; + +static std::initializer_list kDecimal128Cases = { + { + NANOARROW_TYPE_DECIMAL128, 38, 8, kDecimal128Data, + {"-123.456", "0.00001234", "1", "123.456", "1000000", + "23423942305922323.49023094"} + }}; + +static std::initializer_list kDecimal128NoScaleCases = { + { + NANOARROW_TYPE_DECIMAL128, 38, 0, kDecimal128Data, + {"-12345600000", "1234", "100000000", "12345600000", "100000000000000", + "2342394230592232349023094"} }}; INSTANTIATE_TEST_SUITE_P(Decimal128Tests, PostgresDecimalTest, testing::ValuesIn(kDecimal128Cases)); +INSTANTIATE_TEST_SUITE_P(Decimal128NoScale, PostgresDecimalTest, + testing::ValuesIn(kDecimal128NoScaleCases)); diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index c261f3c433..d30aa0a979 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -1528,199 +1528,6 @@ void StatementTest::TestSqlIngestFloat64() { ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType(NANOARROW_TYPE_DOUBLE)); } -// For full coverage, ensure that this contains Decimal examples that: -// - Have >= four zeroes to the left of the decimal point -// - Have >= four zeroes to the right of the decimal point -// - Have >= four trailing zeroes to the right of the decimal point -// - Have >= four leading zeroes before the first digit to the right of the decimal point -// - Is < 0 (negative) -// - The arrow Decimal implementations do not support special values nan, ±inf -void StatementTest::TestSqlIngestDecimal128() { - if (!quirks()->supports_bulk_ingest(ADBC_INGEST_OPTION_MODE_CREATE)) { - GTEST_SKIP(); - } - - ASSERT_THAT(quirks()->DropTable(&connection, "bulk_ingest", &error), - IsOkStatus(&error)); - - Handle schema; - Handle array; - struct ArrowError na_error; - constexpr int32_t size = 128; - constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; - - struct ArrowDecimal decimal1; - struct ArrowDecimal decimal2; - struct ArrowDecimal decimal3; - struct ArrowDecimal decimal4; - struct ArrowDecimal decimal5; - struct ArrowDecimal decimal6; - - ArrowDecimalInit(&decimal1, size, 38, 8); - ArrowDecimalSetInt(&decimal1, -12345600000); - ArrowDecimalInit(&decimal2, size, 38, 8); - ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, size, 38, 8); - ArrowDecimalSetInt(&decimal3, 100000000); - ArrowDecimalInit(&decimal4, size, 38, 8); - ArrowDecimalSetInt(&decimal4, 12345600000); - ArrowDecimalInit(&decimal5, size, 38, 8); - ArrowDecimalSetInt(&decimal5, 100000000000000); - ArrowDecimalInit(&decimal6, size, 38, 8); - // 23423942305922323.49023094 in little endian - uint8_t le_data[16] = { - 0x76, 0xbb, 0xc8, 0x2c, 0x1c, 0x2b, 0x18, 0x72, - 0x05, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00}; - ArrowDecimalSetBytes(&decimal6, le_data); - - const std::vector> values = { - std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5, &decimal6}; - - ASSERT_THAT(MakeSchema(&schema.value, {{"col", type}}), IsOkErrno()); - ASSERT_THAT(MakeBatch(&schema.value, &array.value, - &na_error, values), IsOkErrno()); - - ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); - ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_TARGET_TABLE, - "bulk_ingest", &error), - IsOkStatus(&error)); - ASSERT_THAT(AdbcStatementBind(&statement, &array.value, &schema.value, &error), - IsOkStatus(&error)); - - int64_t rows_affected = 0; - ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, &rows_affected, &error), - IsOkStatus(&error)); - ASSERT_THAT(rows_affected, - ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); - - ASSERT_THAT(AdbcStatementSetSqlQuery( - &statement, - "SELECT * FROM bulk_ingest ORDER BY \"col\" ASC NULLS FIRST", &error), - IsOkStatus(&error)); - { - StreamReader reader; - ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, - &reader.rows_affected, &error), - IsOkStatus(&error)); - ASSERT_THAT(reader.rows_affected, - ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); - - ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); - ArrowType round_trip_type = quirks()->IngestSelectRoundTripType(type); - ASSERT_NO_FATAL_FAILURE( - CompareSchema(&reader.schema.value, {{"col", round_trip_type, NULLABLE}})); - - ASSERT_NO_FATAL_FAILURE(reader.Next()); - ASSERT_NE(nullptr, reader.array->release); - ASSERT_EQ(values.size(), reader.array->length); - ASSERT_EQ(1, reader.array->n_children); - - const std::vector> str_values = { - std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000", - "23423942305922323.49023094"}; - ASSERT_NO_FATAL_FAILURE( - CompareArray(reader.array_view->children[0], str_values)); - - ASSERT_NO_FATAL_FAILURE(reader.Next()); - ASSERT_EQ(nullptr, reader.array->release); - } - ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); -} - -void StatementTest::TestSqlIngestDecimal256() { - if (!quirks()->supports_bulk_ingest(ADBC_INGEST_OPTION_MODE_CREATE)) { - GTEST_SKIP(); - } - - ASSERT_THAT(quirks()->DropTable(&connection, "bulk_ingest", &error), - IsOkStatus(&error)); - - Handle schema; - Handle array; - struct ArrowError na_error; - constexpr int32_t bitwidth = 256; - constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL256; - - struct ArrowDecimal decimal1; - struct ArrowDecimal decimal2; - struct ArrowDecimal decimal3; - struct ArrowDecimal decimal4; - struct ArrowDecimal decimal5; - struct ArrowDecimal decimal6; - - ArrowDecimalInit(&decimal1, bitwidth, 38, 8); - ArrowDecimalSetInt(&decimal1, -12345600000); - ArrowDecimalInit(&decimal2, bitwidth, 38, 8); - ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, bitwidth, 38, 8); - ArrowDecimalSetInt(&decimal3, 100000000); - ArrowDecimalInit(&decimal4, bitwidth, 38, 8); - ArrowDecimalSetInt(&decimal4, 12345600000); - ArrowDecimalInit(&decimal5, bitwidth, 38, 8); - ArrowDecimalSetInt(&decimal5, 100000000000000); - ArrowDecimalInit(&decimal6, bitwidth, 38, 8); - - uint64_t le_data64[4] = {17877984925544397504ULL, 5352188884907840935ULL, - 234631617561833724ULL, 196678011949953713ULL}; - uint8_t le_data[32]; - memcpy(le_data, le_data64, sizeof(le_data64)); - ArrowDecimalSetBytes(&decimal6, le_data); - - const std::vector> values = { - std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5, &decimal6}; - - ASSERT_THAT(MakeSchema(&schema.value, {{"col", type}}), IsOkErrno()); - ASSERT_THAT(MakeBatch(&schema.value, &array.value, - &na_error, values), IsOkErrno()); - - ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); - ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_TARGET_TABLE, - "bulk_ingest", &error), - IsOkStatus(&error)); - ASSERT_THAT(AdbcStatementBind(&statement, &array.value, &schema.value, &error), - IsOkStatus(&error)); - - int64_t rows_affected = 0; - ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, &rows_affected, &error), - IsOkStatus(&error)); - ASSERT_THAT(rows_affected, - ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); - - ASSERT_THAT(AdbcStatementSetSqlQuery( - &statement, - "SELECT * FROM bulk_ingest ORDER BY \"col\" ASC NULLS FIRST", &error), - IsOkStatus(&error)); - { - StreamReader reader; - ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, - &reader.rows_affected, &error), - IsOkStatus(&error)); - ASSERT_THAT(reader.rows_affected, - ::testing::AnyOf(::testing::Eq(values.size()), ::testing::Eq(-1))); - - ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); - ArrowType round_trip_type = quirks()->IngestSelectRoundTripType(type); - ASSERT_NO_FATAL_FAILURE( - CompareSchema(&reader.schema.value, {{"col", round_trip_type, NULLABLE}})); - - ASSERT_NO_FATAL_FAILURE(reader.Next()); - ASSERT_NE(nullptr, reader.array->release); - ASSERT_EQ(values.size(), reader.array->length); - ASSERT_EQ(1, reader.array->n_children); - - const std::vector> str_values = { - std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000", - "12345678901234567890123456789012345678901234567890123456789012345678.90123456"}; - ASSERT_NO_FATAL_FAILURE( - CompareArray(reader.array_view->children[0], str_values)); - - ASSERT_NO_FATAL_FAILURE(reader.Next()); - ASSERT_EQ(nullptr, reader.array->release); - } - ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); -} - - void StatementTest::TestSqlIngestString() { ASSERT_NO_FATAL_FAILURE(TestSqlIngestType( NANOARROW_TYPE_STRING, {std::nullopt, "", "", "1234", "例"}, false)); diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h index 82b0eb6131..874d9a0584 100644 --- a/c/validation/adbc_validation.h +++ b/c/validation/adbc_validation.h @@ -328,10 +328,6 @@ class StatementTest { void TestSqlIngestFloat32(); void TestSqlIngestFloat64(); - // Decmial - void TestSqlIngestDecimal128(); - void TestSqlIngestDecimal256(); - // Strings void TestSqlIngestString(); void TestSqlIngestLargeString(); @@ -438,8 +434,6 @@ class StatementTest { TEST_F(FIXTURE, SqlIngestUInt64) { TestSqlIngestUInt64(); } \ TEST_F(FIXTURE, SqlIngestFloat32) { TestSqlIngestFloat32(); } \ TEST_F(FIXTURE, SqlIngestFloat64) { TestSqlIngestFloat64(); } \ - TEST_F(FIXTURE, SqlIngestDecimal128) { TestSqlIngestDecimal128(); } \ - TEST_F(FIXTURE, SqlIngestDecimal256) { TestSqlIngestDecimal256(); } \ TEST_F(FIXTURE, SqlIngestString) { TestSqlIngestString(); } \ TEST_F(FIXTURE, SqlIngestLargeString) { TestSqlIngestLargeString(); } \ TEST_F(FIXTURE, SqlIngestBinary) { TestSqlIngestBinary(); } \ From 59cdb220b89a63e4809ce9e8cf2371107f64c21e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 17:02:04 -0500 Subject: [PATCH 20/30] fix test precision / scale arguments --- c/driver/postgresql/postgresql_test.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index a7de77ab90..b42949dd3a 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1721,8 +1721,12 @@ TEST_P(PostgresDecimalTest, SelectValue) { values.push_back(&decimals[i]); } - ASSERT_THAT(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), - adbc_validation::IsOkErrno()); + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[0], + type, precision, scale), 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_THAT(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, values), adbc_validation::IsOkErrno()); From 759b0f10bd031a18c335799a9dda94f419517ba1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 17:08:09 -0500 Subject: [PATCH 21/30] add nullability testing --- c/driver/postgresql/postgresql_test.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index b42949dd3a..9e94406793 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1713,7 +1713,6 @@ TEST_P(PostgresDecimalTest, SelectValue) { } std::vector> values; - //values.push_back(std::nullopt); for (size_t i = 0; i < nrecords; i++) { const auto record = data[i]; ArrowDecimalInit(&decimals[i], bitwidth, precision, scale); @@ -1721,6 +1720,10 @@ TEST_P(PostgresDecimalTest, SelectValue) { values.push_back(&decimals[i]); } + auto expected_with_null{expected}; + expected_with_null.insert(expected_with_null.begin(), std::nullopt); + values.push_back(std::nullopt); + ArrowSchemaInit(&schema.value); ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); ASSERT_EQ(AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[0], @@ -1769,7 +1772,8 @@ TEST_P(PostgresDecimalTest, SelectValue) { ASSERT_EQ(1, reader.array->n_children); ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareArray< - std::string>(reader.array_view->children[0], expected)); + std::string>(reader.array_view->children[0], + expected_with_null)); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_EQ(nullptr, reader.array->release); From 9472ff5923527ee77dc2b6f9c6ee0f6d0d4fe9f1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 17:23:00 -0500 Subject: [PATCH 22/30] decimal256 test cases (but failing) --- c/driver/postgresql/postgresql_test.cc | 63 ++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index 9e94406793..91cdf5da2b 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1780,7 +1780,7 @@ TEST_P(PostgresDecimalTest, SelectValue) { } } -static std::vector> kDecimal128Data = { +static std::vector> kDecimalData = { // -12345600000 { 0x00, 0x18, 0x25, 0x20, 0xfd, 0xff, 0xff, 0xff, @@ -1825,21 +1825,78 @@ static std::vector> kDecimal128Data = { }, }; +static std::vector> kDecimal256Data = { + // 1234567890123456789012345678901234567890123456789012345678901234567890123456 + { + 0xf8, 0x1b, 0x5c, 0x91, 0x72, 0xdc, 0xba, 0xc0, + 0x4a, 0x46, 0xcb, 0x6f, 0x07, 0xb9, 0xd5, 0xa7, + 0x03, 0x41, 0x94, 0x36, 0x89, 0xd9, 0x30, 0xfc, + 0x02, 0xba, 0xbd, 0x9c, 0x1d, 0x68, 0x32, 0xb1, + }, + // -1234567890123456789012345678901234567890123456789012345678901234567890123456 + { + 0x07, 0xe4, 0xa3, 0x6e, 0x8d, 0x23, 0x45, 0x40, + 0xb5, 0xb9, 0x34, 0x90, 0xf8, 0x46, 0x2a, 0x58, + 0xfc, 0xbe, 0x6b, 0xc9, 0x76, 0x26, 0xcf, 0x03, + 0xfd, 0x45, 0x42, 0x63, 0xe2, 0x97, 0xcd, 0x4e, + }, +}; + static std::initializer_list kDecimal128Cases = { { - NANOARROW_TYPE_DECIMAL128, 38, 8, kDecimal128Data, + NANOARROW_TYPE_DECIMAL128, 38, 8, kDecimalData, {"-123.456", "0.00001234", "1", "123.456", "1000000", "23423942305922323.49023094"} }}; static std::initializer_list kDecimal128NoScaleCases = { { - NANOARROW_TYPE_DECIMAL128, 38, 0, kDecimal128Data, + NANOARROW_TYPE_DECIMAL128, 38, 0, kDecimalData, + {"-12345600000", "1234", "100000000", "12345600000", "100000000000000", + "2342394230592232349023094"} + }}; + +static std::initializer_list kDecimal256Cases = { + { + NANOARROW_TYPE_DECIMAL256, 38, 8, kDecimalData, + {"-123.456", "0.00001234", "1", "123.456", "1000000", + "23423942305922323.49023094"} + }}; + +static std::initializer_list kDecimal256NoScaleCases = { + { + NANOARROW_TYPE_DECIMAL256, 38, 0, kDecimalData, {"-12345600000", "1234", "100000000", "12345600000", "100000000000000", "2342394230592232349023094"} }}; +static std::initializer_list kDecimal256LargeCases = { + { + NANOARROW_TYPE_DECIMAL256, 76, 8, kDecimal256Data, + { + "12345678901234567890123456789012345678901234567890123456789012345678.90123456", + "-12345678901234567890123456789012345678901234567890123456789012345678.90123456", + } + }}; + +static std::initializer_list kDecimal256LargeNoScaleCases = { + { + NANOARROW_TYPE_DECIMAL256, 76, 0, kDecimal256Data, + { + "12345678901234567890123456789012345678901234567890123456789012345678.90123456", + "-12345678901234567890123456789012345678901234567890123456789012345678.90123456", + } + }}; + INSTANTIATE_TEST_SUITE_P(Decimal128Tests, PostgresDecimalTest, testing::ValuesIn(kDecimal128Cases)); INSTANTIATE_TEST_SUITE_P(Decimal128NoScale, PostgresDecimalTest, testing::ValuesIn(kDecimal128NoScaleCases)); +INSTANTIATE_TEST_SUITE_P(Decimal256Tests, PostgresDecimalTest, + testing::ValuesIn(kDecimal128Cases)); +INSTANTIATE_TEST_SUITE_P(Decimal256NoScale, PostgresDecimalTest, + testing::ValuesIn(kDecimal128NoScaleCases)); +INSTANTIATE_TEST_SUITE_P(Decimal256LargeTests, PostgresDecimalTest, + testing::ValuesIn(kDecimal256LargeCases)); +INSTANTIATE_TEST_SUITE_P(Decimal256LargeNoScale, PostgresDecimalTest, + testing::ValuesIn(kDecimal256LargeNoScaleCases)); From 0eba157d3efded0c56307cc6d929948bead42a80 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 17:31:49 -0500 Subject: [PATCH 23/30] passing DECIMAL256 tests --- c/driver/postgresql/postgresql_test.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index 91cdf5da2b..8461c496f5 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1828,17 +1828,17 @@ static std::vector> kDecimalData = { static std::vector> kDecimal256Data = { // 1234567890123456789012345678901234567890123456789012345678901234567890123456 { - 0xf8, 0x1b, 0x5c, 0x91, 0x72, 0xdc, 0xba, 0xc0, - 0x4a, 0x46, 0xcb, 0x6f, 0x07, 0xb9, 0xd5, 0xa7, - 0x03, 0x41, 0x94, 0x36, 0x89, 0xd9, 0x30, 0xfc, - 0x02, 0xba, 0xbd, 0x9c, 0x1d, 0x68, 0x32, 0xb1, + 0xc0, 0xba, 0xdc, 0x72, 0x91, 0x5c, 0x1b, 0xf8, + 0xa7, 0xd5, 0xb9, 0x07, 0x6f, 0xcb, 0x46, 0x4a, + 0xfc, 0x30, 0xd9, 0x89, 0x36, 0x94, 0x41, 0x03, + 0xb1, 0x32, 0x68, 0x1d, 0x9c, 0xbd, 0xba, 0x02, }, // -1234567890123456789012345678901234567890123456789012345678901234567890123456 { - 0x07, 0xe4, 0xa3, 0x6e, 0x8d, 0x23, 0x45, 0x40, - 0xb5, 0xb9, 0x34, 0x90, 0xf8, 0x46, 0x2a, 0x58, - 0xfc, 0xbe, 0x6b, 0xc9, 0x76, 0x26, 0xcf, 0x03, - 0xfd, 0x45, 0x42, 0x63, 0xe2, 0x97, 0xcd, 0x4e, + 0x40, 0x45, 0x23, 0x8d, 0x6e, 0xa3, 0xe4, 0x07, + 0x58, 0x2a, 0x46, 0xf8, 0x90, 0x34, 0xb9, 0xb5, + 0x03, 0xcf, 0x26, 0x76, 0xc9, 0x6b, 0xbe, 0xfc, + 0x4e, 0xcd, 0x97, 0xe2, 0x63, 0x42, 0x45, 0xfd, }, }; @@ -1874,8 +1874,8 @@ static std::initializer_list kDecimal256LargeCases = { { NANOARROW_TYPE_DECIMAL256, 76, 8, kDecimal256Data, { - "12345678901234567890123456789012345678901234567890123456789012345678.90123456", "-12345678901234567890123456789012345678901234567890123456789012345678.90123456", + "12345678901234567890123456789012345678901234567890123456789012345678.90123456", } }}; @@ -1883,8 +1883,8 @@ static std::initializer_list kDecimal256LargeNoScaleCases = { { NANOARROW_TYPE_DECIMAL256, 76, 0, kDecimal256Data, { - "12345678901234567890123456789012345678901234567890123456789012345678.90123456", - "-12345678901234567890123456789012345678901234567890123456789012345678.90123456", + "-1234567890123456789012345678901234567890123456789012345678901234567890123456", + "1234567890123456789012345678901234567890123456789012345678901234567890123456", } }}; From 97c2d5cf9b9831bd06fc8c8c9e7188bfa65d1dbe Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 17:41:49 -0500 Subject: [PATCH 24/30] lint --- c/driver/postgresql/postgres_copy_reader.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index ecfdde4ba1..b9c883133d 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1294,7 +1294,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size(); - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); for (auto pg_digit : pg_digits) { WriteUnsafe(buffer, pg_digit); } @@ -1356,7 +1356,8 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { static constexpr uint16_t kNumericPos = 0x0000; static constexpr uint16_t kNumericNeg = 0x4000; static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; - static constexpr size_t max_decimal_digits_ = (T == NANOARROW_TYPE_DECIMAL128) ? 39 : 78; + static constexpr size_t max_decimal_digits_ = + (T == NANOARROW_TYPE_DECIMAL128) ? 39 : 78; const int32_t precision_; const int32_t scale_; }; From 443efedffa08b97f0863e2e4a6a958b941c493ab Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 17:57:43 -0500 Subject: [PATCH 25/30] endian agnosticism --- c/driver/postgresql/postgresql_test.cc | 86 ++++++++------------------ 1 file changed, 25 insertions(+), 61 deletions(-) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index 8461c496f5..56f104ae8d 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1635,7 +1635,7 @@ struct DecimalTestCase { const enum ArrowType type; const int32_t precision; const int32_t scale; - const std::vector> data; + const std::vector> data; const std::vector> expected; }; @@ -1714,9 +1714,11 @@ TEST_P(PostgresDecimalTest, SelectValue) { std::vector> values; for (size_t i = 0; i < nrecords; i++) { - const auto record = data[i]; ArrowDecimalInit(&decimals[i], bitwidth, precision, scale); - ArrowDecimalSetBytes(&decimals[i], record.data()); + uint8_t buf[32]; + const auto record = data[i]; + memcpy(buf, record.data(), sizeof(buf)); + ArrowDecimalSetBytes(&decimals[i], buf); values.push_back(&decimals[i]); } @@ -1780,66 +1782,28 @@ TEST_P(PostgresDecimalTest, SelectValue) { } } -static std::vector> kDecimalData = { - // -12345600000 - { - 0x00, 0x18, 0x25, 0x20, 0xfd, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, - // 1234 - { - 0xd2, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, - // 100000000 - { - 0x00, 0xe1, 0xf5, 0x05, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, - // 12345600000 - { - 0x00, 0xe8, 0xda, 0xdf, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, - // 100000000000000 - { - 0x00, 0x40, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, - // 2342394230592232349023094 - { - 0x76, 0xbb, 0xc8, 0x2c, 0x1c, 0x2b, 0x18, 0x72, - 0x05, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, +static std::vector> kDecimalData = { + // -12345600000 + {18446744061363951616ULL, 18446744073709551615ULL, 0, 0}, + // 1234 + {1234ULL, 0, 0, 0}, + // 100000000 + {100000000ULL, 0, 0, 0}, + // 12345600000 + {12345600000ULL, 0, 0, 0}, + // 100000000000000 + {100000000000000ULL, 0, 0, 0}, + // 2342394230592232349023094 + {8221368519775271798ULL, 126981ULL, 0, 0}, }; -static std::vector> kDecimal256Data = { - // 1234567890123456789012345678901234567890123456789012345678901234567890123456 - { - 0xc0, 0xba, 0xdc, 0x72, 0x91, 0x5c, 0x1b, 0xf8, - 0xa7, 0xd5, 0xb9, 0x07, 0x6f, 0xcb, 0x46, 0x4a, - 0xfc, 0x30, 0xd9, 0x89, 0x36, 0x94, 0x41, 0x03, - 0xb1, 0x32, 0x68, 0x1d, 0x9c, 0xbd, 0xba, 0x02, - }, - // -1234567890123456789012345678901234567890123456789012345678901234567890123456 - { - 0x40, 0x45, 0x23, 0x8d, 0x6e, 0xa3, 0xe4, 0x07, - 0x58, 0x2a, 0x46, 0xf8, 0x90, 0x34, 0xb9, 0xb5, - 0x03, 0xcf, 0x26, 0x76, 0xc9, 0x6b, 0xbe, 0xfc, - 0x4e, 0xcd, 0x97, 0xe2, 0x63, 0x42, 0x45, 0xfd, - }, +static std::vector> kDecimal256Data = { + // 1234567890123456789012345678901234567890123456789012345678901234567890123456 + {17877984925544397504ULL, 5352188884907840935ULL, 234631617561833724ULL, + 196678011949953713ULL}, + // -1234567890123456789012345678901234567890123456789012345678901234567890123456 + {568759148165154112ULL, 13094555188801710680ULL, 18212112456147717891ULL, + 18250066061759597902ULL}, }; static std::initializer_list kDecimal128Cases = { From 5a93f9ed5398c33a901bc482cd7cc62e31639b30 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 18 Dec 2023 18:08:01 -0500 Subject: [PATCH 26/30] fixups --- c/driver/postgresql/postgresql_test.cc | 4 ++-- c/validation/adbc_validation_util.cc | 20 +------------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index 56f104ae8d..901883828d 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -1700,7 +1700,7 @@ TEST_P(PostgresDecimalTest, SelectValue) { bitwidth = 256; break; default: - ASSERT_TRUE(false) << " unsupported type for decimal parametrization"; + FAIL(); } // this is a bit of a hack to make std::vector play nicely with @@ -1708,7 +1708,7 @@ TEST_P(PostgresDecimalTest, SelectValue) { constexpr size_t max_decimals = 10; struct ArrowDecimal decimals[max_decimals]; if (nrecords > max_decimals) { - ASSERT_TRUE(false) << + FAIL() << " max_decimals exceeded for test case - please change parametrization"; } diff --git a/c/validation/adbc_validation_util.cc b/c/validation/adbc_validation_util.cc index ade8cf0995..b3ca7d5e9d 100644 --- a/c/validation/adbc_validation_util.cc +++ b/c/validation/adbc_validation_util.cc @@ -141,25 +141,7 @@ int MakeSchema(struct ArrowSchema* schema, const std::vector& field CHECK_ERRNO(ArrowSchemaSetTypeStruct(schema, fields.size())); size_t i = 0; for (const SchemaField& field : fields) { - switch (field.type) { - case NANOARROW_TYPE_DECIMAL128: - // TODO: how can we avoid hard-coding precision and scale? - CHECK_ERRNO(AdbcNsArrowSchemaSetTypeDecimal(schema->children[i], - field.type, - 38, - 8)); - break; - case NANOARROW_TYPE_DECIMAL256: - // TODO: how can we avoid hard-coding precision and scale? - CHECK_ERRNO(AdbcNsArrowSchemaSetTypeDecimal(schema->children[i], - field.type, - 76, - 8)); - break; - default: - CHECK_ERRNO(ArrowSchemaSetType(schema->children[i], field.type)); - } - + CHECK_ERRNO(ArrowSchemaSetType(schema->children[i], field.type)); CHECK_ERRNO(ArrowSchemaSetName(schema->children[i], field.name.c_str())); if (!field.nullable) { schema->children[i]->flags &= ~ARROW_FLAG_NULLABLE; From b629aca951945f348b55927127394b4fd0bf337b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 22 Dec 2023 09:07:33 -0500 Subject: [PATCH 27/30] msvc compat? --- c/driver/postgresql/postgresql_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index 901883828d..eb5d24c384 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include From f5100d0b2b6434b0f707348405838c522f40278b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 22 Dec 2023 09:22:59 -0500 Subject: [PATCH 28/30] fix COPY test --- c/driver/postgresql/postgres_copy_reader_test.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc index 354a02bf00..6dc67d874f 100644 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ b/c/driver/postgresql/postgres_copy_reader_test.cc @@ -699,8 +699,10 @@ TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { adbc_validation::Handle schema; adbc_validation::Handle array; struct ArrowError na_error; - constexpr int32_t size = 128; constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 8; struct ArrowDecimal decimal1; struct ArrowDecimal decimal2; @@ -722,8 +724,11 @@ TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { const std::vector> values = { std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), - ADBC_STATUS_OK); + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[0], + type, precision, scale), 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); From 7e7351d739963d88145bd946b95d08bce86ef826 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 22 Dec 2023 11:01:11 -0500 Subject: [PATCH 29/30] Simple benchmark --- c/driver/postgresql/postgresql_benchmark.cc | 152 ++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/c/driver/postgresql/postgresql_benchmark.cc b/c/driver/postgresql/postgresql_benchmark.cc index 239575a7d7..908269966e 100644 --- a/c/driver/postgresql/postgresql_benchmark.cc +++ b/c/driver/postgresql/postgresql_benchmark.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include @@ -175,5 +176,156 @@ static void BM_PostgresqlExecute(benchmark::State& state) { &error)); } +static void BM_PostgresqlDecimalWrite(benchmark::State& state) { + const char* uri = std::getenv("ADBC_POSTGRESQL_TEST_URI"); + if (!uri || !strcmp(uri, "")) { + state.SkipWithError("ADBC_POSTGRESQL_TEST_URI not set!"); + return; + } + adbc_validation::Handle database; + struct AdbcError error; + + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcDatabaseNew(&database.value, &error)); + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcDatabaseSetOption(&database.value, + "uri", + uri, + &error)); + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcDatabaseInit(&database.value, &error)); + + adbc_validation::Handle connection; + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcConnectionNew(&connection.value, &error)); + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcConnectionInit(&connection.value, + &database.value, + &error)); + + adbc_validation::Handle statement; + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementNew(&connection.value, + &statement.value, + &error)); + + const char* drop_query = "DROP TABLE IF EXISTS adbc_postgresql_ingest_benchmark"; + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementSetSqlQuery(&statement.value, + drop_query, + &error)); + + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementExecuteQuery(&statement.value, + nullptr, + nullptr, + &error)); + + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t bitwidth = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 8; + constexpr size_t ncols = 5; + ArrowSchemaInit(&schema.value); + if (ArrowSchemaSetTypeStruct(&schema.value, ncols) != NANOARROW_OK) { + state.SkipWithError("Call to ArrowSchemaSetTypeStruct failed!"); + error.release(&error); + return; + } + + for (size_t i = 0; i < ncols; i++) { + if (AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[i], + type, precision, scale) != NANOARROW_OK) { + state.SkipWithError("Call to ArrowSchemaSetTypeDecimal failed!"); + error.release(&error); + return; + } + + std::string colname = "col" + std::to_string(i); + if (ArrowSchemaSetName(schema.value.children[i], colname.c_str()) != NANOARROW_OK) { + state.SkipWithError("Call to ArrowSchemaSetName failed!"); + error.release(&error); + return; + } + } + if (ArrowArrayInitFromSchema(&array.value, &schema.value, &na_error) != NANOARROW_OK) { + state.SkipWithError("Call to ArrowArrayInitFromSchema failed!"); + error.release(&error); + return; + } + + if (ArrowArrayStartAppending(&array.value) != NANOARROW_OK) { + state.SkipWithError("Call to ArrowArrayStartAppending failed!"); + error.release(&error); + return; + } + + constexpr size_t nrows = 1000; + struct ArrowDecimal decimal; + ArrowDecimalInit(&decimal, bitwidth, precision, scale); + for (size_t i = 0; i < nrows; i++) { + for (size_t j = 0; j < ncols; j++) { + ArrowDecimalSetInt(&decimal, i + j); + if (ArrowArrayAppendDecimal(array.value.children[j], &decimal) != NANOARROW_OK) { + state.SkipWithError("Call to ArrowArrayAppendDecimal failed"); + error.release(&error); + return; + } + } + } + + for (int64_t i = 0; i < array.value.n_children; i++) { + array.value.children[i]->length = nrows; + } + array.value.length = nrows; + + if (ArrowArrayFinishBuildingDefault(&array.value, &na_error) != NANOARROW_OK) { + state.SkipWithError("Call to ArrowArrayFinishBuildingDefault failed"); + error.release(&error); + return; + } + + const char* create_query = + "CREATE TABLE adbc_postgresql_ingest_benchmark (col0 DECIMAL(38, 8), " + "col1 DECIMAL(38, 8), col2 DECIMAL(38, 8), col3 DECIMAL(38, 8), col4 DECIMAL(38, 8))"; + + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementSetSqlQuery(&statement.value, + create_query, + &error)); + + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementExecuteQuery(&statement.value, + nullptr, + nullptr, + &error)); + + adbc_validation::Handle insert_stmt; + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementNew(&connection.value, + &insert_stmt.value, + &error)); + + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementSetOption(&insert_stmt.value, + ADBC_INGEST_OPTION_TARGET_TABLE, + "adbc_postgresql_ingest_benchmark", + &error)); + + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementSetOption(&insert_stmt.value, + ADBC_INGEST_OPTION_MODE, + ADBC_INGEST_OPTION_MODE_APPEND, + &error)); + + for (auto _ : state) { + AdbcStatementBind(&insert_stmt.value, &array.value, &schema.value, &error); + AdbcStatementExecuteQuery(&insert_stmt.value, nullptr, nullptr, &error); + } + + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementSetSqlQuery(&statement.value, + drop_query, + &error)); + + ADBC_BENCHMARK_RETURN_NOT_OK(AdbcStatementExecuteQuery(&statement.value, + nullptr, + nullptr, + &error)); +} + +// TODO: we are limited to only 1 iteration as AdbcStatementBind is part of +// the benchmark loop, but releases the array when it is done BENCHMARK(BM_PostgresqlExecute)->Iterations(1); +BENCHMARK(BM_PostgresqlDecimalWrite)->Iterations(1); BENCHMARK_MAIN(); From dc1b73523eff6be147aab08eb64557245d0cfc2c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 22 Dec 2023 11:40:54 -0500 Subject: [PATCH 30/30] return int instead of void --- c/driver/postgresql/postgres_copy_reader.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index b9c883133d..99ddaec8c0 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -1241,8 +1241,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { bool truncating_trailing_zeros = true; char decimal_string[max_decimal_digits_ + 1]; - DecimalToString(&decimal, decimal_string); - int digits_remaining = std::strlen(decimal_string); + int digits_remaining = DecimalToString(&decimal, decimal_string); do { const int start_pos = digits_remaining < kDecDigits ? 0 : digits_remaining - kDecDigits; @@ -1303,9 +1302,9 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } private: - // TODO: maybe we should return strlen here + // returns the length of the string template - void DecimalToString(struct ArrowDecimal* decimal, char* out) { + int DecimalToString(struct ArrowDecimal* decimal, char* out) { constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; uint8_t tmp[DEC_WIDTH / 8]; ArrowDecimalGetBytes(decimal, tmp); @@ -1348,9 +1347,11 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { p++; } - const size_t ndigits = sizeof(s) - (p - s); + const size_t ndigits = sizeof(s) - 1 - (p - s); std::memcpy(out, p, ndigits); out[ndigits] = '\0'; + + return ndigits; } static constexpr uint16_t kNumericPos = 0x0000;