-
Notifications
You must be signed in to change notification settings - Fork 95
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(c/driver/postgresql): Support for writing DECIMAL types #1288
Changes from 4 commits
a24b046
dbddd8b
c5dfd05
a091bcd
61eb3cc
a76ab65
078de30
bf8ed7b
75cbd58
94bf657
4b49999
c046632
3957b6d
c5d19bb
ba44774
06b6349
bc19709
10e6e09
e9967a7
6a0d3c9
df7ba3e
ac733bf
0cf303f
59cdb22
759b0f1
9472ff5
0eba157
97c2d5c
443efed
5a93f9e
b629aca
f5100d0
7e7351d
dc1b735
cc252cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1217,6 +1217,77 @@ class PostgresCopyIntervalFieldWriter : public PostgresCopyFieldWriter { | |
} | ||
}; | ||
|
||
|
||
// Inspiration for this taken from get_str_from_var in the pg source | ||
// src/backend/utils/adt/numeric.c | ||
class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { | ||
public: | ||
ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { | ||
struct ArrowDecimal decimal; | ||
// TODO: these need to be inferred from the schema not hard coded | ||
constexpr int16_t precision = 19; | ||
constexpr int16_t scale = 8; | ||
ArrowDecimalInit(&decimal, 128, precision, scale); | ||
ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); | ||
constexpr uint16_t kNumericPos = 0x0000; | ||
constexpr uint16_t kNumericNeg = 0x4000; | ||
constexpr int64_t kNBase = 10000; | ||
// Number of decimal digits per Postgres digit | ||
constexpr int kDecDigits = 4; | ||
|
||
// TODO: need some kind of bounds check on this | ||
int64_t decimal_int = ArrowDecimalGetIntUnsafe(&decimal); | ||
// TODO: is -INT64_MIN possible? If so how do we handle? | ||
if (decimal_int < 0) { | ||
decimal_int = -decimal_int; | ||
} | ||
std::vector<int16_t> pg_digits; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably too small of an optimization to matter, but in principle you should be able to put an upper bound on the number of digits needed to represent an Arrow decimal, and then just stack-allocate? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea that's true and actually how postgres does it internally. If we wanted to stack allocate I guess would just expand that out to whatever is required to store up to 4 decimal words? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, decimal128 would be 38 digits and decimal256 would be 76 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK cool. Shouldn't be too hard to switch to that - just need to figure out how to handle once I get multi-word decimals supported |
||
|
||
int16_t weight = -1; | ||
constexpr size_t nloops = (precision + kDecDigits - 1) / kDecDigits; | ||
for (size_t i = 0; i < nloops; i++) { | ||
const int64_t rem = decimal_int % kNBase; | ||
// TODO: postgres seems to pack records to the left of a decimal place | ||
// internally, so 1000000.0 would be sent as one digit of 100 with | ||
// a weight of 1 (there are weight + 1 pg digits to the left of a decimal place) | ||
// Here we still send two digits of 100 and 0000 | ||
pg_digits.insert(pg_digits.begin(), rem); | ||
|
||
// TODO: how does pg deal with words when integer and decimal part are sent | ||
// in same word? | ||
decimal_int /= kNBase; | ||
if (i >= scale / kDecDigits) { | ||
weight++; | ||
if (decimal_int == 0) { | ||
break; | ||
} | ||
} | ||
} | ||
|
||
int16_t ndigits = pg_digits.size(); | ||
const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; | ||
const int16_t dscale = scale; | ||
|
||
int32_t field_size_bytes = sizeof(ndigits) | ||
+ sizeof(weight) | ||
+ sizeof(sign) | ||
+ sizeof(dscale) | ||
+ ndigits * sizeof(int16_t); | ||
|
||
NANOARROW_RETURN_NOT_OK(WriteChecked<int32_t>(buffer, field_size_bytes, error)); | ||
NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, ndigits, error)); | ||
NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, weight, error)); | ||
NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, sign, error)); | ||
NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, dscale, error)); | ||
|
||
for (auto pg_digit : pg_digits) { | ||
NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, pg_digit, error)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. presumably you could check once then memcpy the digits over There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (again, possibly the compiler already does this) |
||
} | ||
|
||
return ADBC_STATUS_OK; | ||
} | ||
}; | ||
|
||
template <enum ArrowTimeUnit TU> | ||
class PostgresCopyDurationFieldWriter : public PostgresCopyFieldWriter { | ||
public: | ||
|
@@ -1379,6 +1450,9 @@ static inline ArrowErrorCode MakeCopyFieldWriter(struct ArrowSchema* schema, | |
case NANOARROW_TYPE_DOUBLE: | ||
*out = new PostgresCopyDoubleFieldWriter(); | ||
return NANOARROW_OK; | ||
case NANOARROW_TYPE_DECIMAL128: | ||
*out = new PostgresCopyNumericFieldWriter(); | ||
return NANOARROW_OK; | ||
case NANOARROW_TYPE_BINARY: | ||
case NANOARROW_TYPE_STRING: | ||
case NANOARROW_TYPE_LARGE_STRING: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is ultimately correct. I think ideally we would just use the bytes backing the Decimal object, but I haven't yet figured out how that all gets managed when multiple words are required