From 8097f3c8cb7ca525a1ed76f4101cb79409cd7631 Mon Sep 17 00:00:00 2001 From: Matthew Pope Date: Wed, 8 May 2024 13:17:24 -0700 Subject: [PATCH] Fixes Ion 1.1 symbols for opcodes E1-E3 --- .../com/amazon/ion/impl/IonCursorBinary.java | 61 ++++++++++++++++++- .../impl/IonReaderContinuableCoreBinary.java | 25 ++++++-- .../ion/impl/IonCursorTestUtilities.java | 16 ++++- .../IonReaderContinuableCoreBinaryTest.java | 60 ++++++++++++------ 4 files changed, 137 insertions(+), 25 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index 1f09e7d14b..7f0f999d85 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -1050,6 +1050,27 @@ private long uncheckedReadFlexUInt_1_1() { return uncheckedReadLargeFlexUInt_1_1(currentByte); } + /** + * Reads the length of a FlexUInt (or FlexInt) at the given position. + * Does not alter the state of the peekIndex or anything else. + * @return the number of bytes used to encode the FlexUInt (or FlexInt) that starts a "position" + */ + private long uncheckedReadLengthOfFlexUInt_1_1(long position) { + int length = 1; + while (true) { + int numZeros = Integer.numberOfTrailingZeros(buffer[(int) position]); + if (numZeros < 8) { + length += numZeros; + return length; + } else { + // We don't actually know the length without looking at even more bytes, + // so look at another. + length += 8; + position++; + } + } + } + /** * Reads a multi-byte FlexUInt into a long, ensuring enough data is available in the buffer. After this method * returns, `peekIndex` points to the first byte after the end of the FlexUInt. @@ -1089,6 +1110,31 @@ private long slowReadFlexUInt_1_1() { return slowReadLargeFlexUInt_1_1(currentByte); } + /** + * Reads the length of a FlexUInt (or FlexInt) at the given position. + * Does not alter the state of the peekIndex. May fill data, if needed. + * @return the number of bytes used to encode the FlexUInt (or FlexInt) that starts a "position" + * or -1 if the end of the stream has been reached + */ + private long slowReadLengthOfFlexUInt_1_1(long position) { + int length = 1; + while (true) { + if (!fillAt(position, 0)) { + return -1; + } + int numZeros = Integer.numberOfTrailingZeros(buffer[(int) position]); + if (numZeros < 8) { + length += numZeros; + return length; + } else { + // We don't actually know the length without looking at even more bytes, + // so add 8 to length, and then look at the next byte. + length += 8; + position++; + } + } + } + /** * Reads the header of an Ion 1.1 annotation wrapper. This must only be called when it is known that the buffer * already contains all the bytes in the header. Sets `valueMarker` with the start and end indices of the wrapped @@ -1232,7 +1278,14 @@ private long calculateEndIndex_1_1(IonTypeID valueTid, boolean isAnnotated) { event = Event.START_CONTAINER; return DELIMITED_MARKER; } - long endIndex = (valueTid.variableLength ? uncheckedReadFlexUInt_1_1() : valueTid.length) + peekIndex; + long length = valueTid.length; + if (valueTid.variableLength) { + length = uncheckedReadFlexUInt_1_1(); + } else if (length < 0) { + // The value is a FlexInt or FlexUInt, so read the continuation bits to determine the length. + length = uncheckedReadLengthOfFlexUInt_1_1(peekIndex); + } + long endIndex = peekIndex + length; if (valueTid.type != null && valueTid.type.ordinal() >= LIST_TYPE_ORDINAL) { event = Event.START_CONTAINER; } else if (valueTid.isNopPad) { @@ -2008,6 +2061,12 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar if (valueLength < 0) { return true; } + } else if (valueTid.length < 0 && minorVersion > 0) { + // The value is itself a FlexInt or FlexUInt, so read the continuation bits to determine the length. + valueLength = slowReadLengthOfFlexUInt_1_1(peekIndex); + if (valueLength < 0) { + return true; + } } else { valueLength = valueTid.length; } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 868c810801..378a7f0435 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -712,7 +712,7 @@ private BigDecimal readTimestampFraction_1_1() { } value = new BigDecimal(readLargeFixedIntOrFixedUIntAsBigInteger(length), scale); } else if (length > 0) { - value = BigDecimal.valueOf(readFixedUInt_1_1(), scale); + value = BigDecimal.valueOf(readFixedUInt_1_1(peekIndex, valueMarker.endIndex), scale); } else { value = BigDecimal.valueOf(0, scale); } @@ -946,13 +946,13 @@ private long readUInt(long startIndex, long endIndex) { } /** - * Reads a FixedUInt (little-endian), starting at `peekIndex` and ending at `valueMarker.endIndex`. + * Reads a FixedUInt (little-endian), for the range of bytes given by `startInclusive` and `endExclusive`. * @return the value. */ - private long readFixedUInt_1_1() { + private long readFixedUInt_1_1(long startInclusive, long endExclusive) { long result = 0; - for (int i = (int) peekIndex; i < valueMarker.endIndex; i++) { - result |= ((long) (buffer[i] & SINGLE_BYTE_MASK) << ((i - peekIndex) * VALUE_BITS_PER_UINT_BYTE)); + for (int i = (int) startInclusive; i < endExclusive; i++) { + result |= ((long) (buffer[i] & SINGLE_BYTE_MASK) << ((i - startInclusive) * VALUE_BITS_PER_UINT_BYTE)); } return result; } @@ -1306,7 +1306,20 @@ public int symbolValueId() { return -1; } prepareScalar(); - return (int) readUInt(valueMarker.startIndex, valueMarker.endIndex); + if (minorVersion == 0) { + return (int) readUInt(valueMarker.startIndex, valueMarker.endIndex); + } else { + if (valueTid.length == 1){ + return (int) readFixedUInt_1_1(valueMarker.startIndex, valueMarker.endIndex); + } else if (valueTid.length == 2){ + return (int) readFixedUInt_1_1(valueMarker.startIndex, valueMarker.endIndex) + 256; + } else if (valueTid.length == -1) { + peekIndex = valueMarker.startIndex; + return (int) readFlexUInt_1_1() + 65792; + } else { + throw new IllegalStateException("Illegal length " + valueTid.length + " for " + valueMarker); + } + } } /** diff --git a/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java b/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java index 69c009b913..09b7e0105b 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.IonBufferConfiguration; @@ -209,6 +208,21 @@ static ExpectationProvider )); } + /** + * Provides Expectations that verify that advancing the cursor to the next value positions the cursor on a scalar + * with type symbol and the given expected value. + */ + static ExpectationProvider fillSymbolValue(int expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("symbol($%s)", expectedValue), + reader -> { + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(IonType.SYMBOL, reader.getType()); + assertEquals(expectedValue, reader.symbolValueId()); + } + )); + } + /** * Provides an Expectation that verifies that advancing the cursor positions it on a container value, without * filling that container. diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java index 2d81d09974..491aceb89f 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java @@ -1,32 +1,20 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.IonCursor; import com.amazon.ion.IonException; import com.amazon.ion.IonType; +import com.amazon.ion.TestUtils; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; import static com.amazon.ion.BitUtils.bytes; -import static com.amazon.ion.impl.IonCursorTestUtilities.STANDARD_BUFFER_CONFIGURATION; -import static com.amazon.ion.impl.IonCursorTestUtilities.Expectation; -import static com.amazon.ion.impl.IonCursorTestUtilities.ExpectationProvider; -import static com.amazon.ion.impl.IonCursorTestUtilities.assertSequence; -import static com.amazon.ion.impl.IonCursorTestUtilities.container; -import static com.amazon.ion.impl.IonCursorTestUtilities.container; -import static com.amazon.ion.impl.IonCursorTestUtilities.endContainer; -import static com.amazon.ion.impl.IonCursorTestUtilities.endStream; -import static com.amazon.ion.impl.IonCursorTestUtilities.fillContainer; -import static com.amazon.ion.impl.IonCursorTestUtilities.fillIntValue; -import static com.amazon.ion.impl.IonCursorTestUtilities.scalar; -import static com.amazon.ion.impl.IonCursorTestUtilities.scalar; -import static com.amazon.ion.impl.IonCursorTestUtilities.startContainer; -import static com.amazon.ion.impl.IonCursorTestUtilities.fillStringValue; +import static com.amazon.ion.impl.IonCursorTestUtilities.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -34,13 +22,17 @@ public class IonReaderContinuableCoreBinaryTest { private IonReaderContinuableCoreBinary initializeReader(boolean constructFromBytes, int... data) { + return initializeReader(constructFromBytes, bytes(data)); + } + + private IonReaderContinuableCoreBinary initializeReader(boolean constructFromBytes, byte[] data) { IonReaderContinuableCoreBinary reader; if (constructFromBytes) { - reader = new IonReaderContinuableCoreBinary(STANDARD_BUFFER_CONFIGURATION, bytes(data), 0, data.length); + reader = new IonReaderContinuableCoreBinary(STANDARD_BUFFER_CONFIGURATION, data, 0, data.length); } else { reader = new IonReaderContinuableCoreBinary( STANDARD_BUFFER_CONFIGURATION, - new ByteArrayInputStream(bytes(data)), + new ByteArrayInputStream(data), null, 0, 0 @@ -119,6 +111,40 @@ public void basicStrings(boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @CsvSource({ + "0, E1 00 60", + "1, E1 01 60", + "255, E1 FF 60", + "256, E2 00 00 60", + "257, E2 01 00 60", + "512, E2 00 01 60", + "513, E2 01 01 60", + "65535, E2 FF FE 60", + "65791, E2 FF FF 60", + "65792, E3 01 60", + "65793, E3 03 60", + "65919, E3 FF 60", + "65920, E3 02 02 60", + "2147483647 , E3 F0 DF DF FF 0F 60", + }) + public void sidSymbols_1_1(int sid, String bytes) { + sidSymbols_1_1_helper(sid, bytes, true); + sidSymbols_1_1_helper(sid, bytes, false); + } + void sidSymbols_1_1_helper(int sid, String bytes, boolean constructFromBytes) { + IonReaderContinuableCoreBinary reader = initializeReader( + constructFromBytes, + TestUtils.hexStringToByteArray("E0 01 01 EA " + bytes) + ); + assertSequence( + reader, + scalar(), fillSymbolValue(sid), + scalar(), fillIntValue(0), + endStream() + ); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false})