From 1dc407d7105379a819ab48386ac5125e46e3c5d4 Mon Sep 17 00:00:00 2001 From: Matthew Pope <81593196+popematt@users.noreply.github.com> Date: Mon, 20 Nov 2023 13:46:03 -0800 Subject: [PATCH] Updates write support for binary Ion 1.1 decimals and timestamps (#644) --- .../amazon/ion/impl/bin/IonEncoder_1_1.java | 70 +++++---- .../com/amazon/ion/impl/bin/WriteBuffer.java | 12 ++ .../ion/impl/bin/IonEncoder_1_1Test.java | 141 +++++++++--------- 3 files changed, 126 insertions(+), 97 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java b/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java index 7b1ca9d4b2..df04fb3b9c 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java @@ -169,39 +169,43 @@ public static int writeDecimalValue(WriteBuffer buffer, final BigDecimal value) } int exponent = -value.scale(); - - if (BigDecimal.ZERO.compareTo(value) == 0 && !Decimal.isNegativeZero(value)) { - if (exponent == 0) { + int numExponentBytes = WriteBuffer.flexIntLength(exponent); + + byte[] coefficientBytes = null; + int numCoefficientBytes; + if (BigDecimal.ZERO.compareTo(value) == 0) { + if (Decimal.isNegativeZero(value)) { + numCoefficientBytes = 1; + } else if (exponent == 0) { buffer.writeByte(OpCodes.DECIMAL_ZERO_LENGTH); return 1; } else { - // A decimal with a coefficient of +0 is encoded using opcode 6F. - // The opcode is followed by a FlexInt representing the exponent. - buffer.writeByte(OpCodes.POSITIVE_ZERO_DECIMAL); - return 1 + buffer.writeFlexInt(exponent); + numCoefficientBytes = 0; } - } - - BigInteger coefficient = value.unscaledValue(); - int numCoefficientBytes = WriteBuffer.flexIntLength(coefficient); - - int numExponentBytes = 0; - if (exponent != 0) { - numExponentBytes = WriteBuffer.fixedIntLength(exponent); + } else { + coefficientBytes = value.unscaledValue().toByteArray(); + numCoefficientBytes = coefficientBytes.length; } int opCodeAndLengthBytes = 1; - if (numExponentBytes + numCoefficientBytes < 15) { + if (numExponentBytes + numCoefficientBytes < 16) { int opCode = OpCodes.DECIMAL_ZERO_LENGTH + numExponentBytes + numCoefficientBytes; buffer.writeByte((byte) opCode); } else { - // Decimal values that require more than 14 bytes can be encoded using the variable-length decimal opcode: 0xF6. + // Decimal values that require more than 15 bytes can be encoded using the variable-length decimal opcode: 0xF6. buffer.writeByte(OpCodes.VARIABLE_LENGTH_DECIMAL); opCodeAndLengthBytes += buffer.writeFlexUInt(numExponentBytes + numCoefficientBytes); } - buffer.writeFlexInt(coefficient); - if (exponent != 0) { - buffer.writeFixedInt(exponent); + + buffer.writeFlexInt(exponent); + if (numCoefficientBytes > 0) { + if (coefficientBytes != null) { + buffer.writeFixedIntOrUInt(coefficientBytes); + } else if (numCoefficientBytes == 1){ + buffer.writeByte((byte) 0); + } else { + throw new IllegalStateException("Unreachable! coefficientBytes should not be null when numCoefficientBytes > 1"); + } } return opCodeAndLengthBytes + numCoefficientBytes + numExponentBytes; @@ -403,7 +407,6 @@ static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) { return 8; // OpCode + FlexUInt + 6 bytes data } - bits |= ((long) value.getSecond()) << L_TIMESTAMP_SECOND_BIT_OFFSET; int secondsScale = 0; if (value.getZFractionalSecond() != null) { @@ -416,20 +419,29 @@ static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) { } BigDecimal fractionalSeconds = value.getZFractionalSecond(); - BigInteger coefficient = fractionalSeconds.unscaledValue(); + long exponent = fractionalSeconds.scale(); - int numCoefficientBytes = WriteBuffer.flexUIntLength(coefficient); - int numExponentBytes = WriteBuffer.fixedUIntLength(exponent); + int numExponentBytes = WriteBuffer.flexUIntLength(exponent); + + BigInteger coefficient = fractionalSeconds.unscaledValue(); + byte[] coefficientBytes = null; + int numCoefficientBytes = 0; + if (!coefficient.equals(BigInteger.ZERO)) { + coefficientBytes = coefficient.toByteArray(); + numCoefficientBytes = coefficientBytes.length; + } + // Years-seconds data (7 bytes) + fraction coefficient + fraction exponent int dataLength = 7 + numCoefficientBytes + numExponentBytes; - buffer.writeFlexUInt(dataLength); + int numLengthBytes = buffer.writeFlexUInt(dataLength); buffer.writeFixedIntOrUInt(bits, 7); - buffer.writeFlexUInt(coefficient); - buffer.writeFixedUInt(exponent); - + buffer.writeFlexUInt(exponent); + if (coefficientBytes != null) { + buffer.writeFixedIntOrUInt(coefficientBytes); + } // OpCode + FlexUInt length + dataLength - return 1 + WriteBuffer.flexUIntLength(dataLength) + dataLength; + return 1 + numLengthBytes + dataLength; } /** diff --git a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java index 2955147be7..dca22fd1c4 100644 --- a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java +++ b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java @@ -1516,6 +1516,18 @@ private int _writeFixedIntOrUInt(final long value, final int numBytes) { return numBytes; } + /** + * Writes a FixedInt or FixedUInt for an arbitrarily large integer that is represented + * as a byte array in which the most significant byte is the first in the array, and the least + * significant byte is the last in the array. + */ + public int writeFixedIntOrUInt(final byte[] value) { + for (int i = value.length - 1; i >= 0; i--) { + writeByte(value[i]); + } + return value.length; + } + /** Write the entire buffer to output stream. */ public void writeTo(final OutputStream out) throws IOException { diff --git a/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java b/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java index 0e316447e7..192a315056 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java +++ b/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java @@ -237,47 +237,53 @@ public void testWriteFloatValueForDouble(double value, String expectedBytes) { @ParameterizedTest @CsvSource({ - " 0., 60", - " 0e1, 6F 03", - " 0e63, 6F 7F", - " 0e99, 6F 8E 01", - " 0.0, 6F FF", - " 0.00, 6F FD", - " 0.000, 6F FB", - " 0e-64, 6F 81", - " 0e-99, 6F 76 FE", - " -0., 61 01", - " -0e1, 62 01 01", - " -0e3, 62 01 03", - " -0e127, 62 01 7F", - " -0e199, 63 01 C7 00", - " -0e-1, 62 01 FF", - " -0e-2, 62 01 FE", - " -0e-3, 62 01 FD", - " -0e-127, 62 01 81", - " -0e-199, 63 01 39 FF", - " 0.01, 62 03 FE", - " 0.1, 62 03 FF", - " 1, 61 03", - " 1e1, 62 03 01", - " 1e2, 62 03 02", - " 1e127, 62 03 7F", - " 1e128, 63 03 80 00", - " 1e65536, 64 03 00 00 01", - " 2, 61 05", - " 7, 61 0F", - " 14, 61 1D", - " 1.0, 62 15 FF", - " 1.00, 63 92 01 FE", - " 1.27, 63 FE 01 FE", - " 3.142, 63 1A 31 FD", - " 3.14159, 64 7C 59 26 FB", - " 3.141593, 65 98 FD FE 02 FA", - " 3.141592653, 66 B0 C9 1C 68 17 F7", - " 3.14159265359, 67 E0 93 7D 56 49 12 F5", - " 3.1415926535897932, 69 80 4C 43 76 65 9E 9C 6F F0", - " 3.1415926535897932384626434, 6E 00 50 E0 DC F7 CC D6 08 48 99 92 3F 03 E7", - "3.141592653589793238462643383, F6 1F 00 E0 2D 8F A4 21 D0 E7 46 C0 87 AA 89 02 E5", + " 0., 60", + " 0e1, 61 03", + " 0e63, 61 7F", + " 0e64, 62 02 01", + " 0e99, 62 8E 01", + " 0.0, 61 FF", + " 0.00, 61 FD", + " 0.000, 61 FB", + " 0e-64, 61 81", + " 0e-99, 62 76 FE", + " -0., 62 01 00", + " -0e1, 62 03 00", + " -0e3, 62 07 00", + " -0e63, 62 7F 00", + " -0e199, 63 1E 03 00", + " -0e-1, 62 FF 00", + " -0e-2, 62 FD 00", + " -0e-3, 62 FB 00", + " -0e-63, 62 83 00", + " -0e-64, 62 81 00", + " -0e-65, 63 FE FE 00", + " -0e-199, 63 E6 FC 00", + " 0.01, 62 FD 01", + " 0.1, 62 FF 01", + " 1, 62 01 01", + " 1e1, 62 03 01", + " 1e2, 62 05 01", + " 1e63, 62 7F 01", + " 1e64, 63 02 01 01", + " 1e65536, 64 04 00 08 01", + " 2, 62 01 02", + " 7, 62 01 07", + " 14, 62 01 0E", + " 1.0, 62 FF 0A", + " 1.00, 62 FD 64", + " 1.27, 62 FD 7F", + " 1.28, 63 FD 80 00", + " 3.142, 63 FB 46 0C", + " 3.14159, 64 F7 2F CB 04", + " 3.1415927, 65 F3 77 5E DF 01", + " 3.141592653, 66 EF 4D E6 40 BB 00", + " 3.141592653590, 67 E9 16 9F 83 75 DB 02", + " 3.14159265358979323, 69 DF FB A0 9E F6 2F 1E 5C 04", + " 3.1415926535897932384626, 6B D5 72 49 64 CC AF EF 8F 0F A7 06", + " 3.141592653589793238462643383, 6D CB B7 3C 92 86 40 9F 1B 01 1F AA 26 0A", + " 3.14159265358979323846264338327950, 6F C1 8E 29 E5 E3 56 D5 DF C5 10 8F 55 3F 7D 0F", + "3.141592653589793238462643383279503, F6 21 BF 8F 9F F3 E6 64 55 BE BA A7 96 57 79 E4 9A 00", }) public void testWriteDecimalValue(@ConvertWith(StringToDecimal.class) Decimal value, String expectedBytes) { assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeDecimalValue); @@ -383,7 +389,7 @@ public void testWriteTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringT @ParameterizedTest @CsvSource({ - // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Coefficient+ Scale + // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Scale+ Coefficient "0001T, 11110111 00000101 00000001 00000000", "1947T, 11110111 00000101 10011011 00000111", "9999T, 11110111 00000101 00001111 00100111", @@ -396,38 +402,37 @@ public void testWriteTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringT "1947-12-23T23:59Z, 11110111 00001101 10011011 00000111 11011111 10111011 10000011 00010110", "1947-12-23T23:59:00Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 00010110 00000000", "1947-12-23T23:59:59Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 11010110 00001110", - "1947-12-23T23:59:00.0Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000001", - "1947-12-23T23:59:00.00Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000010", - "1947-12-23T23:59:00.000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000011", - "1947-12-23T23:59:00.0000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000100", - "1947-12-23T23:59:00.00000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000101", - "1947-12-23T23:59:00.000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000110", - "1947-12-23T23:59:00.0000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000111", - "1947-12-23T23:59:00.00000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00001000", - "1947-12-23T23:59:00.9Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010011 00000001", - "1947-12-23T23:59:00.99Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11000111 00000010", - "1947-12-23T23:59:00.999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10011110 00001111 00000011", - "1947-12-23T23:59:00.9999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00111110 10011100 00000100", - "1947-12-23T23:59:00.99999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00110100 00001100 00000101", - "1947-12-23T23:59:00.999999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00010001 01111010 00000110", - "1947-12-23T23:59:00.9999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 01100111 10001001 00001001 00000111", - "1947-12-23T23:59:00.99999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 00001111 01011110 01011111 00001000", + "1947-12-23T23:59:00.0Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000011", + "1947-12-23T23:59:00.00Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101", + "1947-12-23T23:59:00.000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000111", + "1947-12-23T23:59:00.0000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001001", + "1947-12-23T23:59:00.00000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001011", + "1947-12-23T23:59:00.000000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001101", + "1947-12-23T23:59:00.0000000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001111", + "1947-12-23T23:59:00.00000000Z, 11110111 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010001", + "1947-12-23T23:59:00.9Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000011 00001001", + "1947-12-23T23:59:00.99Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101 01100011", + "1947-12-23T23:59:00.999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000111 11100111 00000011", + "1947-12-23T23:59:00.9999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001001 00001111 00100111", + "1947-12-23T23:59:00.99999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001011 10011111 10000110 00000001", + "1947-12-23T23:59:00.999999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001101 00111111 01000010 00001111", + "1947-12-23T23:59:00.9999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001111 01111111 10010110 10011000 00000000", + "1947-12-23T23:59:00.99999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010001 11111111 11100000 11110101 00000101", "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + - "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 10001101", + "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00110110 00000010", "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + - "11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 01101000 00000001", + "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10100010 00000101", "1947-12-23T23:59:00.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999Z, " + - "11110111 10010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 " + - "11111100 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 " + - "11111111 10010100 10001001 01111001 01101100 11001110 01111000 11110010 01000000 01111101 10100110 11000111 10101000 01000110 01011001 01110001 01001101 " + - "00100000 11110101 01101110 01111010 00001100 00001001 11101111 01111111 11110011 00011110 00010100 11010111 01101000 01110111 10101100 01101100 10001110 " + - "00110010 10110111 10000010 11110010 00110110 01101000 11110010 10100111 10001101", - + "11110111 10001001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00110110 00000010 11111111 11111111 11111111 11111111 11111111 11111111 " + + "11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 10011111 00110010 00110001 10001111 11001101 00011001 " + + "01001111 00011110 10101000 11001111 11110100 00011000 11010101 00101000 00101011 10101110 00001001 10100100 11011110 01001101 10001111 00100001 11100001 " + + "11111101 01101111 11011110 10000011 11100010 00011010 11101101 10001110 10010101 11001101 01010001 11100110 01010110 01010000 11011110 00000110 01001101 " + + "11111110 00010100", // Offsets "2048-01-01T01:01-23:59, 11110111 00001101 00000000 01001000 10000100 00010000 00000100 00000000", @@ -456,8 +461,8 @@ public void testWriteTimestampValueLongForm(@ConvertWith(StringToTimestamp.class // Long form because the offset is not a multiple of 15 "2048-01-01T01:01+00:01, 11110111 00001101 00000000 01001000 10000100 00010000 10000100 00010110", - // Long form because the fractional seconds are millis, micros, or nanos - "2023-12-31T23:59:00.0Z, 11110111 00010011 11100111 00000111 11111111 10111011 10000011 00010110 00000000 00000001 00000001", + // Long form because the fractional seconds are not millis, micros, or nanos + "2023-12-31T23:59:00.0Z, 11110111 00010001 11100111 00000111 11111111 10111011 10000011 00010110 00000000 00000011", }) public void testWriteTimestampDelegatesCorrectlyToLongForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeTimestampValue);