From 5ddd95af0082013a15ab086512e0c139c7aa8699 Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Fri, 20 Oct 2023 13:45:35 -0700 Subject: [PATCH 1/4] Makes the binary reader set the encoding version when seeked to a span. --- src/com/amazon/ion/impl/IonCursorBinary.java | 12 +++++++++++- .../IonReaderContinuableTopLevelBinary.java | 2 +- .../ion/streaming/SeekableReaderTest.java | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/com/amazon/ion/impl/IonCursorBinary.java b/src/com/amazon/ion/impl/IonCursorBinary.java index 5308dffc7e..ed1d9defe7 100644 --- a/src/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/com/amazon/ion/impl/IonCursorBinary.java @@ -9,6 +9,7 @@ import com.amazon.ion.IonCursor; import com.amazon.ion.IonType; import com.amazon.ion.IvmNotificationConsumer; +import com.amazon.ion.SystemSymbols; import java.io.ByteArrayInputStream; import java.io.EOFException; @@ -1786,8 +1787,9 @@ Marker getValueMarker() { * can be used to seek the reader to a "span" of bytes that represent a value in the stream. * @param offset the offset at which the slice will begin. * @param limit the slice's limit. + * @param ionVersionId the Ion version ID for the slice, e.g. $ion_1_0 for Ion 1.0. */ - void slice(long offset, long limit) { + void slice(long offset, long limit, String ionVersionId) { peekIndex = offset; this.limit = limit; setCheckpointBeforeUnannotatedTypeId(); @@ -1795,6 +1797,14 @@ void slice(long offset, long limit) { event = Event.NEEDS_DATA; valueTid = null; containerIndex = -1; // Slices are treated as if they were at the top level. + if (SystemSymbols.ION_1_0.equals(ionVersionId)) { + typeIds = IonTypeID.TYPE_IDS_1_0; + majorVersion = 1; + minorVersion = 0; + } else { + // TODO changes are needed here to support Ion 1.1. + throw new IonException(String.format("Attempted to seek using an unsupported Ion version %s.", ionVersionId)); + } } /** diff --git a/src/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java index 8e06fafe14..25a9383f8e 100644 --- a/src/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java +++ b/src/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java @@ -308,8 +308,8 @@ public void hoist(Span span) { // of the value to be the end of the stream, in order to comply with the SeekableReader contract. From // an implementation perspective, this is not necessary; if we leave the buffer's limit unchanged, the // reader can continue after processing the hoisted value. - slice(binarySpan.bufferOffset, binarySpan.bufferLimit); restoreSymbolTable(binarySpan.symbolTable); + slice(binarySpan.bufferOffset, binarySpan.bufferLimit, binarySpan.symbolTable.getIonVersionId()); type = null; } } diff --git a/test/com/amazon/ion/streaming/SeekableReaderTest.java b/test/com/amazon/ion/streaming/SeekableReaderTest.java index 5510068edc..066249d386 100644 --- a/test/com/amazon/ion/streaming/SeekableReaderTest.java +++ b/test/com/amazon/ion/streaming/SeekableReaderTest.java @@ -19,6 +19,7 @@ import com.amazon.ion.IonType; import com.amazon.ion.IonWriter; import com.amazon.ion.ReaderMaker; +import com.amazon.ion.SeekableReader; import com.amazon.ion.Span; import com.amazon.ion.TestUtils; import com.amazon.ion.impl._Private_Utils; @@ -327,6 +328,22 @@ public void testHoistingAcrossSymbolTableBoundary() expectTopEof(); } + @Test + public void testHoistingFromSpanCreatedByDifferentReaderBeforeNext() + { + read("foo bar"); + in.next(); + in.next(); + Span barSpan = sr.currentSpan(); + + read("foo bar"); // Creates a new reader + initFacets(); + + hoist(barSpan); + assertSame(IonType.SYMBOL, in.next()); + assertEquals("bar", in.stringValue()); + } + //======================================================================== // Failure cases From 72c196236c4dea7079ccdfb389c2f987c286c8a5 Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Mon, 23 Oct 2023 18:02:19 -0700 Subject: [PATCH 2/4] Handles the case where the binary cursor's InputStream provides fewer bytes than requested before reaching EOF. --- src/com/amazon/ion/impl/IonCursorBinary.java | 43 +++++---- ...onReaderContinuableTopLevelBinaryTest.java | 88 +++++++++++++++++++ 2 files changed, 114 insertions(+), 17 deletions(-) diff --git a/src/com/amazon/ion/impl/IonCursorBinary.java b/src/com/amazon/ion/impl/IonCursorBinary.java index ed1d9defe7..0e3ee6c5b2 100644 --- a/src/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/com/amazon/ion/impl/IonCursorBinary.java @@ -524,8 +524,7 @@ private boolean fillAt(long index, long numberOfBytes) { refillableState.bytesRequested = numberOfBytes + (index - offset); if (ensureCapacity(refillableState.bytesRequested)) { // Fill all the free space, not just the shortfall; this reduces I/O. - refill(freeSpaceAt(limit)); - shortfall = refillableState.bytesRequested - availableAt(offset); + shortfall = refill(freeSpaceAt(limit), refillableState.bytesRequested); } else { // The request cannot be satisfied, but not because data was unavailable. Return normally; it is the // caller's responsibility to recover. @@ -646,24 +645,34 @@ private void shiftIndicesLeft(int shiftAmount) { } /** - * Fills the buffer with up to the requested number of additional bytes. It is the caller's responsibility to - * ensure that there is space in the buffer. + * Attempts to fill the buffer with up to the requested number of additional bytes. It is the caller's + * responsibility to ensure that there is space in the buffer. * @param numberOfBytesToFill the number of additional bytes to attempt to add to the buffer. + * @param minimumNumberOfBytesRequired the minimum number of bytes requested to fill the current value. + * @return the shortfall between the number of bytes that were filled and the minimum number requested. If less than + * 1, then at least `minimumNumberOfBytesRequired` were filled. */ - private void refill(long numberOfBytesToFill) { + private long refill(long numberOfBytesToFill, long minimumNumberOfBytesRequired) { int numberOfBytesFilled = -1; - try { - numberOfBytesFilled = refillableState.inputStream.read(buffer, (int) limit, (int) numberOfBytesToFill); - } catch (EOFException e) { - // Certain InputStream implementations (e.g. GZIPInputStream) throw EOFException if more bytes are requested - // to read than are currently available (e.g. if a header or trailer is incomplete). - } catch (IOException e) { - throwAsIonException(e); - } - if (numberOfBytesFilled < 0) { - return; - } - limit += numberOfBytesFilled; + long shortfall; + // Sometimes an InputStream implementation will return fewer than the number of bytes requested even + // if the stream is not at EOF. If this happens and there is still a shortfall, keep requesting bytes + // until either the shortfall is filled or EOF is reached. + do { + try { + numberOfBytesFilled = refillableState.inputStream.read(buffer, (int) limit, (int) numberOfBytesToFill); + } catch (EOFException e) { + // Certain InputStream implementations (e.g. GZIPInputStream) throw EOFException if more bytes are requested + // to read than are currently available (e.g. if a header or trailer is incomplete). + } catch (IOException e) { + throwAsIonException(e); + } + if (numberOfBytesFilled > 0) { + limit += numberOfBytesFilled; + } + shortfall = minimumNumberOfBytesRequired - availableAt(offset); + } while (shortfall > 0 && numberOfBytesFilled >= 0); + return shortfall; } /** diff --git a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index ff41c7a785..46aab05d2b 100644 --- a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -3196,6 +3196,94 @@ public void skipWithoutEnoughDataNonIncrementalFails() throws Exception { reader.close(); } + /** + * An InputStream that returns less than the number of bytes requested from bulk reads. + */ + private static class ThrottlingInputStream extends InputStream { + + private final byte[] data; + private int offset = 0; + + protected ThrottlingInputStream(byte[] data) { + this.data = data; + } + + @Override + public int read() { + return data[offset++] & 0xFF; + } + + private int calculateNumberOfBytesToReturn(int numberOfBytesRequested) { + int available = data.length - offset; + int numberOfBytesToReturn; + if (available > 1 && numberOfBytesRequested > 1) { + // Return fewer bytes than requested and fewer than are available, avoiding EOF. + numberOfBytesToReturn = Math.min(available - 1, numberOfBytesRequested - 1); + } else if (available <= 0) { + return -1; // EOF + } else { + // Only 1 byte is available, so return it as long as at least 1 byte was requested. + numberOfBytesToReturn = Math.min(numberOfBytesRequested, available); + } + return numberOfBytesToReturn; + } + + @Override + public int read(byte[] b, int off, int len) { + int numberOfBytesToReturn = calculateNumberOfBytesToReturn(len); + if (numberOfBytesToReturn < 0) { + return -1; + } + System.arraycopy(data, offset, b, off, numberOfBytesToReturn); + offset += numberOfBytesToReturn; + return numberOfBytesToReturn; + } + + @Override + public long skip(long len) { + int numberOfBytesToSkip = calculateNumberOfBytesToReturn((int) len); + offset += numberOfBytesToSkip; + return numberOfBytesToSkip; + } + } + + @ParameterizedTest(name = "incrementalReadingEnabled={0}") + @ValueSource(booleans = {true, false}) + public void shouldNotFailWhenAnInputStreamProvidesFewerBytesThanRequestedWithoutReachingEof(boolean incrementalReadingEnabled) throws Exception { + readerBuilder = readerBuilder.withIncrementalReadingEnabled(incrementalReadingEnabled) + .withBufferConfiguration(IonBufferConfiguration.Builder.standard().withInitialBufferSize(8).build()); + reader = readerFor(new ThrottlingInputStream(bytes(0xE0, 0x01, 0x00, 0xEA, 0x89, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'))); + assertSequence( + next(IonType.STRING), stringValue("abcdefghi"), + next(null) + ); + reader.close(); + } + + @Test + public void shouldNotFailWhenAnInputStreamProvidesFewerBytesThanRequestedWithoutReachingEofAndTheReaderSkipsTheValue() throws Exception { + reader = boundedReaderFor(new ThrottlingInputStream(bytes(0xE0, 0x01, 0x00, 0xEA, 0x89, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 0x20)), 8, 8, byteAndOversizedValueCountingHandler); + assertSequence( + next(IonType.INT), intValue(0), + next(null) + ); + reader.close(); + assertEquals(1, oversizedCounter.get()); + } + + @Test + public void shouldNotFailWhenGZIPBoundaryIsEncounteredInStringValue() throws Exception { + ResizingPipedInputStream pipe = new ResizingPipedInputStream(128); + // The following lines create a GZIP payload boundary (trailer/header) in the middle of an Ion string value. + pipe.receive(gzippedBytes(0xE0, 0x01, 0x00, 0xEA, 0x89, 'a', 'b')); + pipe.receive(gzippedBytes('c', 'd', 'e', 'f', 'g', 'h', 'i')); + reader = readerFor(new GZIPInputStream(pipe)); + assertSequence( + next(IonType.STRING), stringValue("abcdefghi"), + next(null) + ); + } + @Test public void concatenatedAfterGZIPHeader() throws Exception { // Tests that a stream that initially contains only a GZIP header can be read successfully if more data From 816e283f8a99c4fb878a9c760a4dcdb1b6eb6100 Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Tue, 24 Oct 2023 16:00:57 -0700 Subject: [PATCH 3/4] Ensures that the binary reader can handle a ByteArrayInputStream that returns a negative number from available(). --- src/com/amazon/ion/impl/IonCursorBinary.java | 5 ++++- .../impl/IonReaderContinuableTopLevelBinaryTest.java | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/com/amazon/ion/impl/IonCursorBinary.java b/src/com/amazon/ion/impl/IonCursorBinary.java index 0e3ee6c5b2..2548dd0372 100644 --- a/src/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/com/amazon/ion/impl/IonCursorBinary.java @@ -389,7 +389,10 @@ private static IonBufferConfiguration getFixedSizeConfigurationFor( ByteArrayInputStream inputStream, int alreadyReadLen ) { - int fixedBufferSize = inputStream.available(); + // Note: ByteArrayInputStream.available() can return a negative number because its constructor does + // not validate that the offset and length provided are actually within range of the provided byte array. + // Setting the result to 0 in this case avoids an error when looking up the fixed sized configuration. + int fixedBufferSize = Math.max(0, inputStream.available()); if (alreadyReadLen > 0) { fixedBufferSize += alreadyReadLen; } diff --git a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index 46aab05d2b..0543df8bb3 100644 --- a/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/test/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -3348,6 +3348,17 @@ public void concatenatedAfterMissingGZIPTrailer() throws Exception { assertEquals(1, oversizedCounter.get()); } + @Test + public void shouldNotFailWhenProvidedWithAnEmptyByteArrayInputStream() throws Exception { + reader = IonReaderBuilder.standard().build(new ByteArrayInputStream(new byte[]{})); + assertSequence(next(null)); + reader.close(); + // The following ByteArrayInputStream is weird, but not disallowed. Its available() method will return -1. + reader = IonReaderBuilder.standard().build(new ByteArrayInputStream(new byte[]{}, 1, 1)); + assertSequence(next(null)); + reader.close(); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void incompleteContainerNonContinuable(boolean constructFromBytes) throws Exception { From 390c13e2e062ac0ab8ba63f69ceec056548b07fc Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Fri, 27 Oct 2023 15:32:22 -0700 Subject: [PATCH 4/4] Accurately classifies all int values by size, rather than conservatively classifying all 4-byte ints as Java longs. --- .../impl/IonReaderContinuableCoreBinary.java | 49 +++++++++---------- .../ion/streaming/ReaderIntegerSizeTest.java | 10 ++-- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 862520d14f..811281e64c 100644 --- a/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -51,11 +51,13 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade // The number of bytes occupied by a Java long. private static final int LONG_SIZE_IN_BYTES = 8; - // The smallest negative 8-byte integer that can fit in a long is -0x80_00_00_00_00_00_00_00. - private static final int MOST_SIGNIFICANT_BYTE_OF_MIN_LONG = 0x80; + // The smallest negative 8-byte integer that can fit in a long is 0x80_00_00_00_00_00_00_00 and the smallest + // negative 4-byte integer that can fit in an int is 0x80_00_00_00. + private static final int MOST_SIGNIFICANT_BYTE_OF_MIN_INTEGER = 0x80; - // The largest positive 8-byte integer that can fit in a long is 0x7F_FF_FF_FF_FF_FF_FF_FF. - private static final int MOST_SIGNIFICANT_BYTE_OF_MAX_LONG = 0x7F; + // The largest positive 8-byte integer that can fit in a long is 0x7F_FF_FF_FF_FF_FF_FF_FF and the largest positive + // 4-byte integer that can fit in an int is 0x7F_FF_FF_FF. + private static final int MOST_SIGNIFICANT_BYTE_OF_MAX_INTEGER = 0x7F; // The second-most significant bit in the most significant byte of a VarInt is the sign. private static final int VAR_INT_SIGN_BITMASK = 0x40; @@ -418,34 +420,30 @@ private boolean readBoolean_1_0() { } /** - * Determines whether the 8-byte integer starting at `valueMarker.startIndex` and ending at `valueMarker.endIndex` - * fits in a long, or requires a BigInteger. - * @return either `IntegerSize.LONG` or `IntegerSize.BIG_INTEGER`. + * Determines whether the integer starting at `valueMarker.startIndex` and ending at `valueMarker.endIndex` + * crosses a type boundary. Callers must only invoke this method when the integer's size is known to be either + * 4 or 8 bytes. + * @return true if the value fits in the Java integer type that matches its Ion serialized size; false if it + * requires the next larger size. */ - private IntegerSize classifyEightByteInt_1_0() { + private boolean classifyInteger_1_0() { if (valueTid.isNegativeInt) { - // The smallest negative 8-byte integer that can fit in a long is -0x80_00_00_00_00_00_00_00. int firstByte = buffer[(int)(valueMarker.startIndex)] & SINGLE_BYTE_MASK; - if (firstByte < MOST_SIGNIFICANT_BYTE_OF_MIN_LONG) { - return IntegerSize.LONG; - } else if (firstByte > MOST_SIGNIFICANT_BYTE_OF_MIN_LONG) { - return IntegerSize.BIG_INTEGER; + if (firstByte < MOST_SIGNIFICANT_BYTE_OF_MIN_INTEGER) { + return true; + } else if (firstByte > MOST_SIGNIFICANT_BYTE_OF_MIN_INTEGER) { + return false; } for (long i = valueMarker.startIndex + 1; i < valueMarker.endIndex; i++) { if (0x00 != buffer[(int)(i)]) { - return IntegerSize.BIG_INTEGER; + return false; } } - } else { - // The largest positive 8-byte integer that can fit in a long is 0x7F_FF_FF_FF_FF_FF_FF_FF. - if ((buffer[(int)(valueMarker.startIndex)] & SINGLE_BYTE_MASK) > MOST_SIGNIFICANT_BYTE_OF_MAX_LONG) { - return IntegerSize.BIG_INTEGER; - } + return true; } - return IntegerSize.LONG; + return (buffer[(int) (valueMarker.startIndex)] & SINGLE_BYTE_MASK) <= MOST_SIGNIFICANT_BYTE_OF_MAX_INTEGER; } - int readVarUInt_1_1() { throw new UnsupportedOperationException(); } @@ -536,16 +534,13 @@ public IntegerSize getIntegerSize() { if (valueTid.length < 0) { return IntegerSize.BIG_INTEGER; } else if (valueTid.length < INT_SIZE_IN_BYTES) { - // Note: this is conservative. Most integers of size 4 also fit in an int, but since exactly the - // same parsing code is used for ints and longs, there is no point wasting the time to determine the - // smallest possible type. return IntegerSize.INT; + } else if (valueTid.length == INT_SIZE_IN_BYTES) { + return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.INT : IntegerSize.LONG; } else if (valueTid.length < LONG_SIZE_IN_BYTES) { return IntegerSize.LONG; } else if (valueTid.length == LONG_SIZE_IN_BYTES) { - // Because creating BigIntegers is so expensive, it is worth it to look ahead and determine exactly - // which 8-byte integers can fit in a long. - return minorVersion == 0 ? classifyEightByteInt_1_0() : IntegerSize.LONG; + return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.LONG : IntegerSize.BIG_INTEGER; } return IntegerSize.BIG_INTEGER; } diff --git a/test/com/amazon/ion/streaming/ReaderIntegerSizeTest.java b/test/com/amazon/ion/streaming/ReaderIntegerSizeTest.java index 73fcb7193b..cc31a06cb3 100644 --- a/test/com/amazon/ion/streaming/ReaderIntegerSizeTest.java +++ b/test/com/amazon/ion/streaming/ReaderIntegerSizeTest.java @@ -142,11 +142,10 @@ public void testGetIntegerSizeNegativeIntBoundary() private void testGetIntegerSizeIntBoundary(int boundaryValue, long pastBoundary) { in.next(); - // It's fine if IntegerSize recommends a larger-than-necessary type, just not a smaller one. - assertTrue(IntegerSize.INT == in.getIntegerSize() || IntegerSize.LONG == in.getIntegerSize()); + assertEquals(IntegerSize.INT, in.getIntegerSize()); assertEquals(boundaryValue, in.intValue()); // assert nothing changes until next() - assertTrue(IntegerSize.INT == in.getIntegerSize() || IntegerSize.LONG == in.getIntegerSize()); + assertEquals(IntegerSize.INT, in.getIntegerSize()); in.next(); assertEquals(IntegerSize.LONG, in.getIntegerSize()); assertEquals(pastBoundary, in.longValue()); @@ -156,10 +155,9 @@ private void testGetIntegerSizeIntBoundary(int boundaryValue, long pastBoundary) private void testGetIntegerSizeLongBoundary(long boundaryValue, BigInteger pastBoundary) { in.next(); - // It's fine if IntegerSize recommends a larger-than-necessary type, just not a smaller one. - assertTrue(IntegerSize.LONG == in.getIntegerSize() || IntegerSize.BIG_INTEGER == in.getIntegerSize()); + assertEquals(IntegerSize.LONG, in.getIntegerSize()); assertEquals(boundaryValue, in.longValue()); - assertTrue(IntegerSize.LONG == in.getIntegerSize() || IntegerSize.BIG_INTEGER == in.getIntegerSize()); + assertEquals(IntegerSize.LONG, in.getIntegerSize()); in.next(); assertEquals(IntegerSize.BIG_INTEGER, in.getIntegerSize()); assertEquals(pastBoundary, in.bigIntegerValue());