From 6bbc17a4c5cdeb11be2312818ec9cbe5e8c1021d Mon Sep 17 00:00:00 2001 From: Matthew Pope <81593196+popematt@users.noreply.github.com> Date: Fri, 1 Dec 2023 12:46:45 -0800 Subject: [PATCH] Initial implementation of Ion 1.1 raw binary writer (#660) --- .../ion/impl/_Private_IonConstants.java | 2 +- .../ion/impl/_Private_RecyclingQueue.java | 25 +- .../java/com/amazon/ion/impl/bin/FlexInt.kt | 175 +++++++++ .../amazon/ion/impl/bin/IonEncoder_1_1.java | 6 +- .../ion/impl/bin/IonRawBinaryWriter.java | 34 -- .../ion/impl/bin/IonRawBinaryWriter_1_1.kt | 357 ++++++++++++++++++ .../ion/impl/bin/Ion_1_1_Constants.java | 2 + .../java/com/amazon/ion/impl/bin/OpCodes.java | 2 + .../com/amazon/ion/impl/bin/PatchPoint.java | 44 +++ .../com/amazon/ion/impl/bin/WriteBuffer.java | 216 +++++------ .../java/com/amazon/ion/util/Assumptions.kt | 47 +++ .../com/amazon/ion/impl/bin/FlexIntTest.kt | 213 +++++++++++ .../impl/bin/IonManagedBinaryWriterTest.java | 14 +- .../impl/bin/IonRawBinaryWriterTest_1_1.kt | 230 +++++++++++ .../amazon/ion/impl/bin/WriteBufferTest.java | 119 +++++- 15 files changed, 1320 insertions(+), 166 deletions(-) create mode 100644 src/main/java/com/amazon/ion/impl/bin/FlexInt.kt create mode 100644 src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt create mode 100644 src/main/java/com/amazon/ion/impl/bin/PatchPoint.java create mode 100644 src/main/java/com/amazon/ion/util/Assumptions.kt create mode 100644 src/test/java/com/amazon/ion/impl/bin/FlexIntTest.kt create mode 100644 src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonConstants.java b/src/main/java/com/amazon/ion/impl/_Private_IonConstants.java index d3db2cb29b..bde35c56a0 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_IonConstants.java +++ b/src/main/java/com/amazon/ion/impl/_Private_IonConstants.java @@ -114,7 +114,7 @@ public static final boolean isSurrogate(int c) { (byte) 0xEA }; /** - * The byte sequence indicating use of Ion 1.0 binary format. + * The byte sequence indicating use of Ion 1.1 binary format. */ public static final byte[] BINARY_VERSION_MARKER_1_1 = { (byte) 0xE0, (byte) 0x01, diff --git a/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java b/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java index a52ccd7a35..22afbafd07 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java +++ b/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java @@ -1,3 +1,5 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import java.util.ArrayList; @@ -52,7 +54,7 @@ public T next() { /** * @param initialCapacity the initial capacity of the underlying collection. - * @param elementFactory the factory used to create a new element on {@link #push()} when the queue has + * @param elementFactory the factory used to create a new element on {@link #push(Recycler)} when the queue has * not previously grown to the new depth. */ public _Private_RecyclingQueue(int initialCapacity, ElementFactory elementFactory) { @@ -73,7 +75,7 @@ public T get(int index) { /** * Pushes an element onto the top of the queue, instantiating a new element only if the queue has not * previously grown to the new depth. - * @return the element at the top of the queue after the push. This element must be initialized by the caller. + * @return the index of the element at the top of the queue after the push. This element must be initialized by the caller. */ public int push(Recycler recycler) { currentIndex++; @@ -87,6 +89,23 @@ public int push(Recycler recycler) { return currentIndex; } + /** + * Pushes an element onto the top of the queue, instantiating a new element only if the queue has not + * previously grown to the new depth. + * @return the element at the top of the queue after the push. + */ + public T pushAndGet(Recycler recycler) { + currentIndex++; + if (currentIndex >= elements.size()) { + top = elementFactory.newElement(); + elements.add(top); + } else { + top = elements.get(currentIndex); + } + recycler.recycle(top); + return top; + } + /** * Reclaim the current element. */ @@ -119,4 +138,4 @@ public void clear() { public int size() { return currentIndex + 1; } -} \ No newline at end of file +} diff --git a/src/main/java/com/amazon/ion/impl/bin/FlexInt.kt b/src/main/java/com/amazon/ion/impl/bin/FlexInt.kt new file mode 100644 index 0000000000..98ca9e2bda --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/FlexInt.kt @@ -0,0 +1,175 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import java.math.BigInteger + +/** + * Functions for encoding FlexInts and FlexUInts. + * + * Expected usage is calling one of the `___length` functions, and then using the result as the input for + * [writeFlexIntOrUIntInto]. The length and write functions are separate so that callers can make decisions or + * compute other values based on the encoded size of the value. + */ +object FlexInt { + + /** Determine the length of FlexUInt for the provided value. */ + @JvmStatic + fun flexUIntLength(value: Long): Int { + val numLeadingZeros = java.lang.Long.numberOfLeadingZeros(value) + val numMagnitudeBitsRequired = 64 - numLeadingZeros + return (numMagnitudeBitsRequired - 1) / 7 + 1 + } + + /** Determine the length of FlexInt for the provided value. */ + @JvmStatic + fun flexIntLength(value: Long): Int { + val numMagnitudeBitsRequired: Int + numMagnitudeBitsRequired = if (value < 0) { + val numLeadingOnes = java.lang.Long.numberOfLeadingZeros(value.inv()) + 64 - numLeadingOnes + } else { + val numLeadingZeros = java.lang.Long.numberOfLeadingZeros(value) + 64 - numLeadingZeros + } + return numMagnitudeBitsRequired / 7 + 1 + } + + /** + * Writes a FlexInt or FlexUInt encoding of [value] into [data] starting at [offset]. + * Use [flexIntLength] or [flexUIntLength] to get the value for the [numBytes] parameter. + */ + @JvmStatic + fun writeFlexIntOrUIntInto(data: ByteArray, offset: Int, value: Long, numBytes: Int) { + var i = offset + + when (numBytes) { + 1 -> { + data[i] = (0x01L or (value shl 1)).toByte() + } + 2 -> { + data[i] = (0x02L or (value shl 2)).toByte() + data[++i] = (value shr 6).toByte() + } + 3 -> { + data[i] = (0x04L or (value shl 3)).toByte() + data[++i] = (value shr 5).toByte() + data[++i] = (value shr 13).toByte() + } + 4 -> { + data[i] = (0x08L or (value shl 4)).toByte() + data[++i] = (value shr 4).toByte() + data[++i] = (value shr 12).toByte() + data[++i] = (value shr 20).toByte() + } + 5 -> { + data[i] = (0x10L or (value shl 5)).toByte() + data[++i] = (value shr 3).toByte() + data[++i] = (value shr 11).toByte() + data[++i] = (value shr 19).toByte() + data[++i] = (value shr 27).toByte() + } + 6 -> { + data[i] = (0x20L or (value shl 6)).toByte() + data[++i] = (value shr 2).toByte() + data[++i] = (value shr 10).toByte() + data[++i] = (value shr 18).toByte() + data[++i] = (value shr 26).toByte() + data[++i] = (value shr 34).toByte() + } + 7 -> { + data[i] = (0x40L or (value shl 7)).toByte() + data[++i] = (value shr 1).toByte() + data[++i] = (value shr 9).toByte() + data[++i] = (value shr 17).toByte() + data[++i] = (value shr 25).toByte() + data[++i] = (value shr 33).toByte() + data[++i] = (value shr 41).toByte() + } + 8 -> { + data[i] = 0x80.toByte() + data[++i] = (value shr 0).toByte() + data[++i] = (value shr 8).toByte() + data[++i] = (value shr 16).toByte() + data[++i] = (value shr 24).toByte() + data[++i] = (value shr 32).toByte() + data[++i] = (value shr 40).toByte() + data[++i] = (value shr 48).toByte() + } + 9 -> { + data[i] = 0 + data[++i] = (0x01L or (value shl 1)).toByte() + data[++i] = (value shr 7).toByte() + data[++i] = (value shr 15).toByte() + data[++i] = (value shr 23).toByte() + data[++i] = (value shr 31).toByte() + data[++i] = (value shr 39).toByte() + data[++i] = (value shr 47).toByte() + data[++i] = (value shr 55).toByte() + } + 10 -> { + data[i] = 0 + data[++i] = (0x02L or (value shl 2)).toByte() + data[++i] = (value shr 6).toByte() + data[++i] = (value shr 14).toByte() + data[++i] = (value shr 22).toByte() + data[++i] = (value shr 30).toByte() + data[++i] = (value shr 38).toByte() + data[++i] = (value shr 46).toByte() + data[++i] = (value shr 54).toByte() + data[++i] = (value shr 62).toByte() + } + } + } + + /** Determine the length of FlexUInt for the provided value. */ + @JvmStatic + fun flexUIntLength(value: BigInteger): Int { + return (value.bitLength() - 1) / 7 + 1 + } + + /** Determine the length of FlexInt for the provided value. */ + @JvmStatic + fun flexIntLength(value: BigInteger): Int { + return value.bitLength() / 7 + 1 + } + + /** + * Writes a FlexInt or FlexUInt encoding of [value] into [data] starting at [offset]. + * Use [flexIntLength] or [flexUIntLength] to get the value for the [numBytes] parameter. + */ + @JvmStatic + fun writeFlexIntOrUIntInto(data: ByteArray, offset: Int, value: BigInteger, numBytes: Int) { + // TODO: Should we branch to the implementation for long if the number is small enough? + // https://github.com/amazon-ion/ion-java/issues/614 + val valueBytes = value.toByteArray() + var i = 0 // `i` gets incremented for every byte written. + + // Start with leading zero bytes. + // If there's 1-8 total bytes, we need no leading zero-bytes. + // If there's 9-16 total bytes, we need one zero-byte + // If there's 17-24 total bytes, we need two zero-bytes, etc. + while (i < (numBytes - 1) / 8) { + data[offset + i] = 0 + i++ + } + + // Write the last length bits, possibly also containing some value bits. + val remainingLengthBits = (numBytes - 1) % 8 + val lengthPart = (0x01 shl remainingLengthBits).toByte() + val valueBitOffset = remainingLengthBits + 1 + val valuePart = (valueBytes[valueBytes.size - 1].toInt() shl valueBitOffset).toByte() + data[offset + i] = (valuePart.toInt() or lengthPart.toInt()).toByte() + i++ + for (valueByteOffset in valueBytes.size - 1 downTo 1) { + // Technically it's only a nibble if the bitOffset is 4, so we call it nibble-ish + val highNibbleIsh = (valueBytes[valueByteOffset - 1].toInt() shl valueBitOffset).toByte() + val lowNibbleIsh = (valueBytes[valueByteOffset].toInt() and 0xFF shr 8 - valueBitOffset).toByte() + data[offset + i] = (highNibbleIsh.toInt() or lowNibbleIsh.toInt()).toByte() + i++ + } + if (i < numBytes) { + data[offset + i] = (valueBytes[0].toInt() shr 8 - valueBitOffset).toByte() + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java b/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java index 73fc1bbbc4..0e29bf2176 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java @@ -167,7 +167,7 @@ public static int writeDecimalValue(WriteBuffer buffer, final BigDecimal value) } int exponent = -value.scale(); - int numExponentBytes = WriteBuffer.flexIntLength(exponent); + int numExponentBytes = FlexInt.flexIntLength(exponent); byte[] coefficientBytes = null; int numCoefficientBytes; @@ -419,7 +419,7 @@ static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) { BigDecimal fractionalSeconds = value.getZFractionalSecond(); long exponent = fractionalSeconds.scale(); - int numExponentBytes = WriteBuffer.flexUIntLength(exponent); + int numExponentBytes = FlexInt.flexUIntLength(exponent); BigInteger coefficient = fractionalSeconds.unscaledValue(); byte[] coefficientBytes = null; @@ -556,7 +556,7 @@ public static int writeAnnotations(WriteBuffer buffer, long[] annotations) { } else { int numAddressBytes = 0; for (long ann : annotations) { - numAddressBytes += WriteBuffer.flexUIntLength(ann); + numAddressBytes += FlexInt.flexUIntLength(ann); } buffer.writeByte(OpCodes.ANNOTATIONS_MANY_SYMBOL_ADDRESS); int numLengthBytes = buffer.writeFlexUInt(numAddressBytes); diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java index 315d4bf204..5d43f6c093 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java @@ -57,7 +57,6 @@ import java.math.BigInteger; import java.util.Iterator; import java.util.ListIterator; -import java.util.NoSuchElementException; /** * Low-level binary {@link IonWriter} that understands encoding concerns but doesn't operate with any sense of symbol table management. @@ -296,39 +295,6 @@ public String toString() } } - private static class PatchPoint - { - /** position of the data being patched out. */ - public long oldPosition; - /** length of the data being patched out.*/ - public int oldLength; - /** size of the container data or annotations.*/ - public long length; - public PatchPoint() - { - oldPosition = -1; - oldLength = -1; - length = -1; - } - - @Override - public String toString() - { - return "(PP old::(" + oldPosition + " " + oldLength + ") patch::(" + length + ")"; - } - - public PatchPoint initialize(final long oldPosition, final int oldLength, final long length) { - this.oldPosition = oldPosition; - this.oldLength = oldLength; - this.length = length; - return this; - } - - public PatchPoint clear() { - return initialize(-1, -1, -1); - } - } - /*package*/ enum StreamCloseMode { NO_CLOSE, diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt new file mode 100644 index 0000000000..af082ee156 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt @@ -0,0 +1,357 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.* +import com.amazon.ion.util.* +import java.io.ByteArrayOutputStream +import java.math.BigDecimal +import java.math.BigInteger +import java.time.Instant + +class IonRawBinaryWriter_1_1 internal constructor( + private val out: ByteArrayOutputStream, + private val buffer: WriteBuffer, + private val lengthPrefixPreallocation: Int, +) : IonWriter_1_1 { + + /** + * Types of encoding containers. + */ + enum class ContainerType { + List, + SExp, + Struct, + Macro, + Stream, + /** + * Represents the top level stream. The [containerStack] always has [ContainerInfo] for [Top] at the bottom + * of the stack so that we never have to check if [currentContainer] is null. + * + * TODO: Test if performance is better if we just check currentContainer for nullness. + */ + Top, + } + + private data class ContainerInfo( + var type: ContainerType? = null, + var isDelimited: Boolean = false, + var position: Long = -1, + var length: Long = -1, + // TODO: Test if performance is better with an Object Reference or an index into the PatchPoint queue. + var patchPoint: PatchPoint? = null, + ) + + private var numAnnotations = 0 + private var hasFieldName = false + + private var closed = false + + private val patchPoints = _Private_RecyclingQueue(512) { PatchPoint() } + private val containerStack = _Private_RecyclingStack(8) { ContainerInfo() } + + private var currentContainer: ContainerInfo = containerStack.push { it.type = Top } + + override fun finish() { + if (closed) return + confirm(depth() == 0) { "Cannot call finish() while in a container" } + + if (patchPoints.isEmpty) { + // nothing to patch--write 'em out! + buffer.writeTo(out) + } else { + var bufferPosition: Long = 0 + + // Patch length values are long, so they always fit in 10 bytes or fewer. + val flexUIntScratch = ByteArray(10) + + val iterator = patchPoints.iterate() + + while (iterator.hasNext()) { + val patch = iterator.next() + if (patch.length < 0) { + continue + } + // write up to the thing to be patched + val bufferLength = patch.oldPosition - bufferPosition + buffer.writeTo(out, bufferPosition, bufferLength) + + // write out the patch + // TODO: See if there's a measurable performance benefit if we write directly to the output stream vs using the flexUIntScratch + val numBytes = FlexInt.flexUIntLength(patch.length) + FlexInt.writeFlexIntOrUIntInto(flexUIntScratch, 0, patch.length, numBytes) + out.write(flexUIntScratch, 0, numBytes) + + // skip over the preallocated field + bufferPosition = patch.oldPosition + bufferPosition += patch.oldLength.toLong() + } + buffer.writeTo(out, bufferPosition, buffer.position() - bufferPosition) + } + + buffer.reset() + patchPoints.clear() + + // TODO: Stream flush mode + } + + override fun close() { + if (closed) return + finish() + buffer.close() + closed = true + } + + override fun depth(): Int = containerStack.size() - 1 // "Top" doesn't count when counting depth. + + override fun isInStruct(): Boolean = currentContainer.type == Struct + + override fun writeIVM() { + confirm(currentContainer.type == Top) { "IVM can only be written at the top level of an Ion stream." } + confirm(numAnnotations == 0) { "Cannot write an IVM with annotations" } + buffer.writeBytes(_Private_IonConstants.BINARY_VERSION_MARKER_1_1) + } + + override fun writeAnnotations(annotation0: Int) { + TODO("Not yet implemented") + } + + override fun writeAnnotations(annotation0: Int, annotation1: Int) { + TODO("Not yet implemented") + } + + override fun writeAnnotations(annotation0: Int, annotation1: Int, vararg annotations: Int) { + TODO("Not yet implemented") + } + + override fun writeAnnotations(annotation0: CharSequence) { + TODO("Not yet implemented") + } + + override fun writeAnnotations(annotation0: CharSequence, annotation1: CharSequence) { + TODO("Not yet implemented") + } + + override fun writeAnnotations(annotation0: CharSequence, annotation1: CharSequence, vararg annotations: CharSequence) { + TODO("Not yet implemented") + } + + /** + * Helper function for handling annotations and field names when starting a value. + */ + private inline fun openValue(valueWriterExpression: () -> Unit) { + if (numAnnotations > 0) { + TODO("Actually write out the annotations.") + } + numAnnotations = 0 + hasFieldName = false + valueWriterExpression() + } + + /** + * Helper function for finishing scalar values. Similar concerns for containers are handled in [stepOut]. + */ + private inline fun closeScalar(valueWriterExpression: () -> Int) { + val numBytesWritten = valueWriterExpression() + + // Update the container length (unless it's Top) + if (currentContainer.type != Top) currentContainer.length += numBytesWritten + } + + /** + * Helper function for writing scalar values that composes both [openValue] and [closeScalar]. + */ + private inline fun writeScalar(valueWriterExpression: () -> Int) { + openValue { closeScalar(valueWriterExpression) } + } + + override fun writeFieldName(sid: Int) { + TODO("Not implemented yet.") + } + + override fun writeFieldName(text: CharSequence) { + TODO("Not implemented yet.") + } + + override fun writeNull() = writeScalar { IonEncoder_1_1.writeNullValue(buffer, IonType.NULL) } + + override fun writeNull(type: IonType) = writeScalar { IonEncoder_1_1.writeNullValue(buffer, type) } + + override fun writeBool(value: Boolean) = writeScalar { IonEncoder_1_1.writeBoolValue(buffer, value) } + + override fun writeInt(value: Byte) { + TODO("Not yet implemented") + } + + override fun writeInt(value: Int) { + TODO("Not yet implemented") + } + + override fun writeInt(value: Long) { + TODO("Not yet implemented") + } + + override fun writeInt(value: BigInteger) { + TODO("Not yet implemented") + } + + override fun writeFloat(value: Float) { + TODO("Not yet implemented") + } + + override fun writeFloat(value: Double) { + TODO("Not yet implemented") + } + + override fun writeDecimal(value: BigDecimal) { + TODO("Not yet implemented") + } + + override fun writeDecimal(value: Decimal) { + TODO("Not yet implemented") + } + + override fun writeTimestamp(value: Timestamp) { + TODO("Not yet implemented") + } + + override fun writeTimestamp(value: Instant) { + TODO("Not yet implemented") + } + + override fun writeSymbol(id: Int) { + TODO("Not yet implemented") + } + + override fun writeSymbol(text: CharSequence) { + TODO("Not yet implemented") + } + + override fun writeString(value: CharSequence) { + TODO("Not yet implemented") + } + + override fun writeBlob(value: ByteArray, start: Int, length: Int) { + TODO("Not yet implemented") + } + + override fun writeClob(value: ByteArray, start: Int, length: Int) { + TODO("Not yet implemented") + } + + override fun stepInList(delimited: Boolean) { + openValue { + currentContainer = containerStack.push { + it.type = List + it.position = buffer.position() + it.isDelimited = delimited + it.length = 0 + } + if (delimited) { + buffer.writeByte(OpCodes.DELIMITED_LIST) + } else { + buffer.writeByte(OpCodes.VARIABLE_LENGTH_LIST) + buffer.reserve(lengthPrefixPreallocation) + } + } + } + + override fun stepInSExp(delimited: Boolean) { + TODO("Not yet implemented") + } + + override fun stepInStruct(delimited: Boolean, useFlexSym: Boolean) { + TODO("Not yet implemented") + } + + override fun stepInEExp(name: CharSequence) { + TODO("Not supported by the raw binary writer.") + } + + override fun stepInEExp(id: Int) { + TODO("Not yet implemented") + } + + override fun stepInStream() { + TODO("Not yet implemented") + } + + override fun stepOut() { + confirm(!hasFieldName) { "Cannot step out with dangling field name." } + confirm(numAnnotations == 0) { "Cannot step out with dangling annotations." } + + // The length of the current container, including any opcodes and length prefixes + var thisContainerTotalLength: Long = 1 + currentContainer.length + + // Write closing delimiter if we're in a delimited container. + // Update length prefix if we're in a prefixed container. + if (currentContainer.isDelimited) { + thisContainerTotalLength += 1 // For the end marker + buffer.writeByte(OpCodes.DELIMITED_END_MARKER) + } else { + // currentContainer.type is non-null for any initialized ContainerInfo + when (currentContainer.type.assumeNotNull()) { + List -> { + // TODO: Possibly extract this so it can be reused for the other length-prefixed container types + val contentLength = currentContainer.length + if (contentLength <= 0xF) { + // Clean up any unused space that was pre-allocated. + buffer.shiftBytesLeft(currentContainer.length.toInt(), lengthPrefixPreallocation) + buffer.writeUInt8At(currentContainer.position, OpCodes.LIST_ZERO_LENGTH + contentLength) + } else { + val lengthPrefixBytesRequired = FlexInt.flexUIntLength(contentLength) + thisContainerTotalLength += lengthPrefixBytesRequired + + if (lengthPrefixBytesRequired == lengthPrefixPreallocation) { + // We have enough space, so write in the correct length. + buffer.writeFlexIntOrUIntAt(currentContainer.position + 1, currentContainer.length, lengthPrefixBytesRequired) + } else { + addPatchPointsToStack() + // currentContainer is in containerStack, so we know that its patchPoint is non-null. + currentContainer.patchPoint.assumeNotNull().apply { + oldPosition = currentContainer.position + 1 + oldLength = lengthPrefixPreallocation + length = currentContainer.length + } + } + } + } + SExp -> TODO() + Struct -> TODO() + Macro -> TODO() + Stream -> TODO() + Top -> throw IonException("Nothing to step out of.") + } + } + + // Set the new current container + containerStack.pop() + currentContainer = containerStack.peek() + // Update the length of the new current container to include the length of the container that we just stepped out of. + if (currentContainer.type != Top) currentContainer.length += thisContainerTotalLength + } + + private fun addPatchPointsToStack() { + // TODO: We may be able to improve this by skipping patch points on ancestors that are delimited containers, + // since the patch points for delimited containers will go unused anyway. However, the additional branching + // may negate the effect of any reduction in allocations. + + // If we're adding a patch point we first need to ensure that all of our ancestors (containing values) already + // have a patch point. No container can be smaller than the contents, so all outer layers also require patches. + // Instead of allocating iterator, we share one iterator instance within the scope of the container stack and reset the cursor every time we track back to the ancestors. + val stackIterator: ListIterator = containerStack.iterator() + // Walk down the stack until we find an ancestor which already has a patch point + while (stackIterator.hasNext() && stackIterator.next().patchPoint == null); + + // The iterator cursor is now positioned on an ancestor container that has a patch point + // Ascend back up the stack, fixing the ancestors which need a patch point assigned before us + while (stackIterator.hasPrevious()) { + val ancestor = stackIterator.previous() + if (ancestor.patchPoint == null) { + ancestor.patchPoint = patchPoints.pushAndGet { it.clear() } + } + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java index 21dce227ad..b1924b7bd2 100644 --- a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java +++ b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java @@ -1,3 +1,5 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; import com.amazon.ion.Timestamp; diff --git a/src/main/java/com/amazon/ion/impl/bin/OpCodes.java b/src/main/java/com/amazon/ion/impl/bin/OpCodes.java index 424b65100d..35fe3ad65f 100644 --- a/src/main/java/com/amazon/ion/impl/bin/OpCodes.java +++ b/src/main/java/com/amazon/ion/impl/bin/OpCodes.java @@ -37,6 +37,8 @@ private OpCodes() {} public static final byte INLINE_SYMBOL_ZERO_LENGTH = (byte) 0x90; + public static final byte LIST_ZERO_LENGTH = (byte) 0xA0; + public static final byte SYMBOL_ADDRESS_1_BYTE = (byte) 0xE1; public static final byte SYMBOL_ADDRESS_2_BYTES = (byte) 0xE2; public static final byte SYMBOL_ADDRESS_MANY_BYTES = (byte) 0xE3; diff --git a/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java b/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java new file mode 100644 index 0000000000..8355549da1 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java @@ -0,0 +1,44 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin; + +/** + * Represents a slice of bytes that need to be overwritten by a variable length, unsigned integer that is too large + * to fit into the specified slice. + */ +class PatchPoint { + /** + * position of the data being patched out. + */ + public long oldPosition; + /** + * length of the data being patched out. + */ + public int oldLength; + /** + * size of the container data or annotations. + */ + public long length; + + public PatchPoint() { + oldPosition = -1; + oldLength = -1; + length = -1; + } + + @Override + public String toString() { + return "(PP old::(" + oldPosition + " " + oldLength + ") patch::(" + length + ")"; + } + + public PatchPoint initialize(final long oldPosition, final int oldLength, final long length) { + this.oldPosition = oldPosition; + this.oldLength = oldLength; + this.length = length; + return this; + } + + public PatchPoint clear() { + return initialize(-1, -1, -1); + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java index dca22fd1c4..a467274d2d 100644 --- a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java +++ b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java @@ -24,6 +24,10 @@ /** * A facade over {@link Block} management and low-level Ion encoding concerns for the {@link IonRawBinaryWriter}. + * + * The allocator must always have a block size of at least 10 bytes, otherwise writing a FlexInt or FlexUInt may result + * in an IndexOutOfBoundsException. The number 10 is chosen because it is the maximum number of bytes required to write + * a long value as a FlexInt or VarInt. */ /*package*/ final class WriteBuffer implements Closeable { @@ -32,11 +36,16 @@ private Block current; private int index; private Runnable endOfBlockCallBack; - + private byte[] scratch = new byte[32]; public WriteBuffer(final BlockAllocator allocator, Runnable endOfBlockCallBack) { this.allocator = allocator; + + if (allocator.getBlockSize() < 10) { + throw new IllegalArgumentException("WriteBuffer requires an allocator with a block size of at least 10."); + } + this.blocks = new ArrayList(); // initial seed of the first block @@ -96,6 +105,41 @@ public void truncate(final long position) current = block; } + /** + * Moves forward without writing any data. + * + * There is no guarantee as to what values the reserved bytes will have. + * Only use this method if you will overwrite the bytes later with valid data, or if you have already written dato + * to these bytes. + * + * Returns the position of the first reserved byte. + */ + public long reserve(int numBytes) { + long startOfReservedBytes = position(); + // It would also fit in the current block if numBytes == current.remaining(), but then we would have to + // increment `index` and check whether to allocate a new block. So, we'll optimize the early return for the most + // common situation, and lump the == case into the slower path. + if (numBytes < current.remaining()) { + current.limit += numBytes; + return startOfReservedBytes; + } + + while (numBytes > 0) { + int numBytesInThisBlock = Math.min(current.remaining(), numBytes); + current.limit += numBytesInThisBlock; + numBytes -= numBytesInThisBlock; + + if (current.remaining() == 0) { + if (index == blocks.size() - 1) { + allocateNewBlock(); + } + index++; + current = blocks.get(index); + } + } + return startOfReservedBytes; + } + /** Returns the amount of capacity left in the current block. */ public int remaining() { @@ -1277,30 +1321,25 @@ public void writeLowerNibbleAt(final long position, final long value) { block.data[offset] = (byte) (bitValue & 0xF0 | value) ; } - /** Get the length of FlexInt for the provided value. */ - public static int flexIntLength(final long value) { - int numMagnitudeBitsRequired; - if (value < 0) { - int numLeadingOnes = Long.numberOfLeadingZeros(~value); - numMagnitudeBitsRequired = 64 - numLeadingOnes; - } else { - int numLeadingZeros = Long.numberOfLeadingZeros(value); - numMagnitudeBitsRequired = 64 - numLeadingZeros; - } - return numMagnitudeBitsRequired / 7 + 1; - } - /** Writes a FlexInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ public int writeFlexInt(final long value) { - int numBytes = flexIntLength(value); - return writeFlexIntOrUInt(value, numBytes); + int numBytes = FlexInt.flexIntLength(value); + // writeFlexIntOrUIntAt does not advance index or limit, so we reserve the bytes, and then write out the number + long position = reserve(numBytes); + writeFlexIntOrUIntAt(position, value, numBytes); + return numBytes; } - /** Get the length of FlexUInt for the provided value. */ - public static int flexUIntLength(final long value) { - int numLeadingZeros = Long.numberOfLeadingZeros(value); - int numMagnitudeBitsRequired = 64 - numLeadingZeros; - return (numMagnitudeBitsRequired - 1) / 7 + 1; + /** Writes a FlexUInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ + public int writeFlexUInt(final int value) { + if (value < 0) { + throw new IllegalArgumentException("Attempted to write a FlexUInt for " + value); + } + int numBytes = FlexInt.flexUIntLength(value); + // writeFlexIntOrUIntAt does not advance index or limit, so we reserve the bytes, and then write out the number + long position = reserve(numBytes); + writeFlexIntOrUIntAt(position, value, numBytes); + return numBytes; } /** Writes a FlexUInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ @@ -1308,117 +1347,70 @@ public int writeFlexUInt(final long value) { if (value < 0) { throw new IllegalArgumentException("Attempted to write a FlexUInt for " + value); } - int numBytes = flexUIntLength(value); - return writeFlexIntOrUInt(value, numBytes); + int numBytes = FlexInt.flexUIntLength(value); + // writeFlexIntOrUIntAt does not advance index or limit, so we reserve the bytes, and then write out the number + long position = reserve(numBytes); + writeFlexIntOrUIntAt(position, value, numBytes); + return numBytes; } /** + * Writes a FlexInt or FlexUInt to this WriteBuffer at the specified position. + * * Because the flex int and flex uint encodings are so similar, we can use this method to write either one as long * as we provide the correct number of bytes needed to encode the value. + * + * If the allocator's block size is ever less than 10 bytes, this may throw an IndexOutOfBoundsException. */ - private int writeFlexIntOrUInt(final long value, final int numBytes) { - if (numBytes == 1) { - writeByte((byte) (0x01 | (byte)(value << 1))); - } else if (numBytes == 2) { - writeByte((byte) (0x02 | (byte)(value << 2))); - writeByte((byte) (value >> 6)); - } else if (numBytes == 3) { - writeByte((byte) (0x04 | (byte)(value << 3))); - writeByte((byte) (value >> 5)); - writeByte((byte) (value >> 13)); - } else if (numBytes == 4) { - writeByte((byte) (0x08 | (byte)(value << 4))); - writeByte((byte) (value >> 4)); - writeByte((byte) (value >> 12)); - writeByte((byte) (value >> 20)); + public void writeFlexIntOrUIntAt(final long position, final long value, final int numBytes) { + int index = index(position); + Block block = blocks.get(index); + int dataOffset = offset(position); + if (dataOffset + numBytes < block.capacity()) { + FlexInt.writeFlexIntOrUIntInto(block.data, dataOffset, value, numBytes); } else { - // Finally, fall back to a loop based approach. - - int i = 0; // `i` gets incremented for every byte written. - - // Start with leading zero bytes. - // If there's 1-8 total bytes, we need no leading zero-bytes. - // If there's 9-16 total bytes, we need one zero-byte - // If there's 17-24 total bytes, we need two zero-bytes, etc. - for (; i < (numBytes - 1)/8; i++) { - writeByte((byte) 0); + FlexInt.writeFlexIntOrUIntInto(scratch, 0, value, numBytes); + if (index == blocks.size() - 1) { + allocateNewBlock(); } - - // Write the last length bits, possibly also containing some value bits. - int remainingLengthBits = (numBytes - 1) % 8; - byte lengthPart = (byte) (0x01 << remainingLengthBits); - - int valueBitOffset = remainingLengthBits + 1; - byte valuePart = (byte) (value << valueBitOffset); - - writeByte((byte) (valuePart | lengthPart)); - i++; - - int valueByteOffset = 1; - for (; i < numBytes; i++) { - writeByte((byte) (value >> (8 * valueByteOffset - valueBitOffset))); - valueByteOffset++; + for (int i = 0; i < numBytes; i++) { + writeUInt8At(position + i, scratch[i]); } - } - return numBytes; - } - - public static int flexIntLength(final BigInteger value) { - return value.bitLength() / 7 + 1; - } - - public static int flexUIntLength(final BigInteger value) { - return (value.bitLength() - 1) / 7 + 1; } + /** Writes a FlexInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ public int writeFlexInt(final BigInteger value) { - int numBytes = flexIntLength(value); - return writeFlexIntOrUIntForBigInteger(value, numBytes); + int numBytes = FlexInt.flexIntLength(value); + if (numBytes > current.remaining()) { + if (scratch.length < numBytes) { + scratch = new byte[numBytes]; + } + FlexInt.writeFlexIntOrUIntInto(scratch, 0, value, numBytes); + writeBytesSlow(scratch, 0, numBytes); + } else { + FlexInt.writeFlexIntOrUIntInto(current.data, current.limit, value, numBytes); + current.limit += numBytes; + } + return numBytes; } + /** Writes a FlexUInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ public int writeFlexUInt(final BigInteger value) { if (value.signum() < 0) { throw new IllegalArgumentException("Attempted to write a FlexUInt for " + value); } - int numBytes = flexUIntLength(value); - return writeFlexIntOrUIntForBigInteger(value, numBytes); - } - - private int writeFlexIntOrUIntForBigInteger(final BigInteger value, final int numBytes) { - // TODO: Should we branch to the implementation for long if the number is small enough? - // https://github.com/amazon-ion/ion-java/issues/614 - byte[] valueBytes = value.toByteArray(); - - int i = 0; // `i` gets incremented for every byte written. - - // Start with leading zero bytes. - // If there's 1-8 total bytes, we need no leading zero-bytes. - // If there's 9-16 total bytes, we need one zero-byte - // If there's 17-24 total bytes, we need two zero-bytes, etc. - for (; i < (numBytes - 1)/8; i++) { - writeByte((byte) 0); - } - - // Write the last length bits, possibly also containing some value bits. - int remainingLengthBits = (numBytes - 1) % 8; - byte lengthPart = (byte) (0x01 << remainingLengthBits); - int valueBitOffset = remainingLengthBits + 1; - byte valuePart = (byte) (valueBytes[valueBytes.length - 1] << valueBitOffset); - writeByte((byte) (valuePart | lengthPart)); - i++; - - for (int valueByteOffset = valueBytes.length - 1; valueByteOffset > 0; valueByteOffset--) { - // Technically it's only a nibble if the bitOffset is 4, so we call it nibble-ish - byte highNibbleIsh = (byte) (valueBytes[valueByteOffset - 1] << (valueBitOffset)); - byte lowNibbleIsh = (byte) ((valueBytes[valueByteOffset] & 0xFF) >> (8 - valueBitOffset)); - writeByte((byte) (highNibbleIsh | lowNibbleIsh)); - i++; - } - if (i < numBytes) { - writeByte((byte) ((valueBytes[0]) >> (8 - valueBitOffset))); + int numBytes = FlexInt.flexUIntLength(value); + if (numBytes > current.remaining()) { + if (scratch.length < numBytes) { + scratch = new byte[numBytes]; + } + FlexInt.writeFlexIntOrUIntInto(scratch, 0, value, numBytes); + writeBytesSlow(scratch, 0, numBytes); + } else { + FlexInt.writeFlexIntOrUIntInto(current.data, current.limit, value, numBytes); + current.limit += numBytes; } - return numBytes; } diff --git a/src/main/java/com/amazon/ion/util/Assumptions.kt b/src/main/java/com/amazon/ion/util/Assumptions.kt new file mode 100644 index 0000000000..d552cad160 --- /dev/null +++ b/src/main/java/com/amazon/ion/util/Assumptions.kt @@ -0,0 +1,47 @@ +package com.amazon.ion.util + +import com.amazon.ion.IonException +import kotlin.contracts.ExperimentalContracts +import kotlin.contracts.contract + +/** + * Similar to the `!!` operator, this function assumes that the value is not null. Unlike the + * `!!` operator, this function does it without actually checking if the value is null. + * + * Why? This has no branches. If we actually checked if it was null, then there would be branching. + */ +@OptIn(ExperimentalContracts::class) +internal inline fun T?.assumeNotNull(): T { + contract { returns() implies (this@assumeNotNull != null) } + privateAssumeNotNull(this) + return this +} + +/** + * Supporting function for `assumeNotNull`. + * This function exists just to hold the contract to trick the Kotlin compiler into deducing that a value is not null. + */ +@OptIn(ExperimentalContracts::class) +private inline fun privateAssumeNotNull(value: T?) { + contract { returns() implies (value != null) } +} + +/** + * Tell the compiler that some condition is true. Must have a comment indicating why it is safe to trick the compiler. + */ +@OptIn(ExperimentalContracts::class) +internal inline fun assumeUnchecked(assumption: Boolean) { + contract { returns() implies assumption } +} + +/** + * Checks an assumption, throwing an [IonException] with a lazily created message if the assumption is false. + * + * This is named `confirm` because `check` and `require` are already similar functions in the Kotlin Std Lib, and + * `expect`, `verify`, and `assert` are used for test frameworks. + */ +internal inline fun confirm(assumption: Boolean, lazyMessage: () -> String) { + if (!assumption) { + throw IonException(lazyMessage()) + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/FlexIntTest.kt b/src/test/java/com/amazon/ion/impl/bin/FlexIntTest.kt new file mode 100644 index 0000000000..946d5c6fa0 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/FlexIntTest.kt @@ -0,0 +1,213 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.TestUtils.* +import java.math.BigInteger +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource + +/** + * This does not test individual methods because the individual methods of [FlexInt] are not meant to be + * called in isolation. + */ +class FlexIntTest { + + @ParameterizedTest + @CsvSource( + " 0, 00000001", + " 1, 00000011", + " 2, 00000101", + " 3, 00000111", + " 4, 00001001", + " 5, 00001011", + " 14, 00011101", + " 63, 01111111", + " 64, 00000010 00000001", + " 729, 01100110 00001011", + " 8191, 11111110 01111111", + " 8192, 00000100 00000000 00000001", + " 1048575, 11111100 11111111 01111111", + " 1048576, 00001000 00000000 00000000 00000001", + " 134217727, 11111000 11111111 11111111 01111111", + " 134217728, 00010000 00000000 00000000 00000000 00000001", + " 17179869184, 00100000 00000000 00000000 00000000 00000000 00000001", + " 2199023255552, 01000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 281474976710655, 11000000 11111111 11111111 11111111 11111111 11111111 01111111", + " 281474976710656, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 36028797018963967, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + " 36028797018963968, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 72624976668147840, 00000000 00000001 10000001 01000000 00100000 00010000 00001000 00000100 00000010", + " 4611686018427387903, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + " 4611686018427387904, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " ${Long.MAX_VALUE}, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 00000001", + " -1, 11111111", + " -2, 11111101", + " -3, 11111011", + " -14, 11100101", + " -64, 10000001", + " -65, 11111110 11111110", + " -729, 10011110 11110100", + " -8192, 00000010 10000000", + " -8193, 11111100 11111111 11111110", + " -1048576, 00000100 00000000 10000000", + " -1048577, 11111000 11111111 11111111 11111110", + " -134217728, 00001000 00000000 00000000 10000000", + " -134217729, 11110000 11111111 11111111 11111111 11111110", + " -17179869184, 00010000 00000000 00000000 00000000 10000000", + " -17179869185, 11100000 11111111 11111111 11111111 11111111 11111110", + " -281474976710656, 01000000 00000000 00000000 00000000 00000000 00000000 10000000", + " -281474976710657, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " -36028797018963968, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + " -36028797018963969, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " -72624976668147841, 00000000 11111111 01111110 10111111 11011111 11101111 11110111 11111011 11111101", + "-4611686018427387904, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + "-4611686018427387905, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " ${Long.MIN_VALUE}, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110" + ) + fun testWriteFlexInt(value: Long, expectedBits: String) { + val numBytes: Int = FlexInt.flexIntLength(value) + val bytes = ByteArray(numBytes) + FlexInt.writeFlexIntOrUIntInto(bytes, 0, value, numBytes) + Assertions.assertEquals(expectedBits, byteArrayToBitString(bytes)) + Assertions.assertEquals((expectedBits.length + 1) / 9, numBytes) + } + + @ParameterizedTest + @CsvSource( + " 0, 00000001", + " 1, 00000011", + " 2, 00000101", + " 3, 00000111", + " 4, 00001001", + " 5, 00001011", + " 14, 00011101", + " 63, 01111111", + " 64, 10000001", + " 127, 11111111", + " 128, 00000010 00000010", + " 729, 01100110 00001011", + " 16383, 11111110 11111111", + " 16384, 00000100 00000000 00000010", + " 2097151, 11111100 11111111 11111111", + " 2097152, 00001000 00000000 00000000 00000010", + " 268435455, 11111000 11111111 11111111 11111111", + " 268435456, 00010000 00000000 00000000 00000000 00000010", + " 34359738368, 00100000 00000000 00000000 00000000 00000000 00000010", + " 4398046511104, 01000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 562949953421311, 11000000 11111111 11111111 11111111 11111111 11111111 11111111", + " 562949953421312, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 72057594037927935, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111", + " 72057594037927936, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 72624976668147840, 00000000 00000001 10000001 01000000 00100000 00010000 00001000 00000100 00000010", + " ${Long.MAX_VALUE}, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111" + ) + fun testWriteFlexUInt(value: Long, expectedBits: String) { + val numBytes: Int = FlexInt.flexUIntLength(value) + val bytes = ByteArray(numBytes) + FlexInt.writeFlexIntOrUIntInto(bytes, 0, value, numBytes) + Assertions.assertEquals(expectedBits, byteArrayToBitString(bytes)) + Assertions.assertEquals((expectedBits.length + 1) / 9, numBytes) + } + + @ParameterizedTest + @CsvSource( + " 0, 00000001", + " 1, 00000011", + " 2, 00000101", + " 3, 00000111", + " 4, 00001001", + " 5, 00001011", + " 14, 00011101", + " 63, 01111111", + " 64, 00000010 00000001", + " 729, 01100110 00001011", + " 8191, 11111110 01111111", + " 8192, 00000100 00000000 00000001", + " 1048575, 11111100 11111111 01111111", + " 1048576, 00001000 00000000 00000000 00000001", + " 134217727, 11111000 11111111 11111111 01111111", + " 134217728, 00010000 00000000 00000000 00000000 00000001", + " 17179869184, 00100000 00000000 00000000 00000000 00000000 00000001", + " 2199023255552, 01000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 281474976710655, 11000000 11111111 11111111 11111111 11111111 11111111 01111111", + " 281474976710656, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 36028797018963967, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + " 36028797018963968, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 72624976668147840, 00000000 00000001 10000001 01000000 00100000 00010000 00001000 00000100 00000010", + " 4611686018427387903, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + " 4611686018427387904, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " ${Long.MAX_VALUE}, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 00000001", + " 9223372036854775808, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " -1, 11111111", + " -2, 11111101", + " -3, 11111011", + " -14, 11100101", + " -64, 10000001", + " -65, 11111110 11111110", + " -729, 10011110 11110100", + " -8192, 00000010 10000000", + " -8193, 11111100 11111111 11111110", + " -1048576, 00000100 00000000 10000000", + " -1048577, 11111000 11111111 11111111 11111110", + " -134217728, 00001000 00000000 00000000 10000000", + " -134217729, 11110000 11111111 11111111 11111111 11111110", + " -17179869184, 00010000 00000000 00000000 00000000 10000000", + " -17179869185, 11100000 11111111 11111111 11111111 11111111 11111110", + " -281474976710656, 01000000 00000000 00000000 00000000 00000000 00000000 10000000", + " -281474976710657, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " -36028797018963968, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + " -36028797018963969, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " -72624976668147841, 00000000 11111111 01111110 10111111 11011111 11101111 11110111 11111011 11111101", + "-4611686018427387904, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + "-4611686018427387905, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " ${Long.MIN_VALUE}, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110", + "-9223372036854775809, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111101" + ) + fun testWriteFlexIntForBigInteger(value: BigInteger, expectedBits: String) { + val numBytes: Int = FlexInt.flexIntLength(value) + val bytes = ByteArray(numBytes) + FlexInt.writeFlexIntOrUIntInto(bytes, 0, value, numBytes) + Assertions.assertEquals(expectedBits, byteArrayToBitString(bytes)) + Assertions.assertEquals((expectedBits.length + 1) / 9, numBytes) + } + + @ParameterizedTest + @CsvSource( + " 0, 00000001", + " 1, 00000011", + " 2, 00000101", + " 3, 00000111", + " 4, 00001001", + " 5, 00001011", + " 14, 00011101", + " 63, 01111111", + " 64, 10000001", + " 127, 11111111", + " 128, 00000010 00000010", + " 729, 01100110 00001011", + " 16383, 11111110 11111111", + " 16384, 00000100 00000000 00000010", + " 2097151, 11111100 11111111 11111111", + " 2097152, 00001000 00000000 00000000 00000010", + " 268435455, 11111000 11111111 11111111 11111111", + " 268435456, 00010000 00000000 00000000 00000000 00000010", + " 34359738368, 00100000 00000000 00000000 00000000 00000000 00000010", + " 4398046511104, 01000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 562949953421311, 11000000 11111111 11111111 11111111 11111111 11111111 11111111", + " 562949953421312, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 72057594037927935, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111", + " 72057594037927936, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 72624976668147840, 00000000 00000001 10000001 01000000 00100000 00010000 00001000 00000100 00000010", + " ${Long.MAX_VALUE}, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111", + "9223372036854775808, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010" + ) + fun testWriteFlexUIntForBigInteger(value: BigInteger, expectedBits: String) { + val numBytes: Int = FlexInt.flexUIntLength(value) + val bytes = ByteArray(numBytes) + FlexInt.writeFlexIntOrUIntInto(bytes, 0, value, numBytes) + Assertions.assertEquals(expectedBits, byteArrayToBitString(bytes)) + Assertions.assertEquals((expectedBits.length + 1) / 9, numBytes) + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java b/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java index bfa1e196d2..2a13a6a740 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java @@ -282,10 +282,14 @@ public void testAutoFlush_67K() throws Exception{ @Test public void testAutoFlush_twiceBlockSize() throws IOException { - IonReader reader = system().newReader(singleTopLevelValue_13B.toByteArray()); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonWriter writer = IonBinaryWriterBuilder.standard().build(out); + writer.writeString("abcdefghijklmnopqrstuvwxyz"); // Write a 27-byte IonString. + writer.close(); + IonReader reader = system().newReader(out.toByteArray()); ByteArrayOutputStream actual = new ByteArrayOutputStream(); - // Set the actual writer block size as 5 bytes. The test data is a 13-byte IonString "taco_burrito". - IonBinaryWriterBuilder builder = IonBinaryWriterBuilder.standard().withAutoFlushEnabled(autoFlushMode.isEnabled()).withBlockSize(5); + // Set the actual writer block size as 10 bytes. The test data is a 27-byte IonString "abcdefghijklmnopqrstuvwxyz". + IonBinaryWriterBuilder builder = IonBinaryWriterBuilder.standard().withAutoFlushEnabled(autoFlushMode.isEnabled()).withBlockSize(10); IonWriter actualWriter = builder.build(actual); while (reader.next() != null) { actualWriter.writeValue(reader); @@ -293,9 +297,9 @@ public void testAutoFlush_twiceBlockSize() throws IOException { actualWriter.close(); if (lstAppendMode.isEnabled() && autoFlushMode.isEnabled()) { // When auto-flush is enabled, no flush is expected since this is a single top-level value and should continue encoding until this value is completed. - assertArrayEquals(actual.toByteArray(), singleTopLevelValue_13B.toByteArray()); + assertArrayEquals(actual.toByteArray(), out.toByteArray()); } - assertEquivalentDataModel(actual, singleTopLevelValue_13B); + assertEquivalentDataModel(actual, out); } @Test diff --git a/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt new file mode 100644 index 0000000000..50ca29e8ee --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt @@ -0,0 +1,230 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.IonException +import com.amazon.ion.IonType +import java.io.ByteArrayOutputStream +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource + +class IonRawBinaryWriterTest_1_1 { + + private inline fun writeAsHexString(block: IonRawBinaryWriter_1_1.() -> Unit): String { + val baos = ByteArrayOutputStream() + val rawWriter = IonRawBinaryWriter_1_1( + out = baos, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32)) {}, + lengthPrefixPreallocation = 1, + ) + block.invoke(rawWriter) + @OptIn(ExperimentalStdlibApi::class) + return baos.toByteArray().joinToString(" ") { it.toHexString(HexFormat.UpperCase) } + } + + private inline fun assertWriterOutputEquals(hexBytes: String, block: IonRawBinaryWriter_1_1.() -> Unit) { + assertEquals(hexBytes, writeAsHexString(block)) + } + + @Test + fun `calling close while in a container should throw IonException`() { + assertThrows { + writeAsHexString { + stepInList(false) + close() + } + } + } + + @Test + fun `calling finish while in a container should throw IonException`() { + assertThrows { + writeAsHexString { + stepInList(true) + finish() + } + } + } + + @Test + fun `calling stepOut while not in a container should throw IonException`() { + assertThrows { + writeAsHexString { + stepOut() + } + } + } + + @Test + fun `calling writeIVM when in a container should throw IonException`() { + assertThrows { + writeAsHexString { + stepInList(false) + writeIVM() + } + } + } + + @Test + fun `calling finish should cause the buffered data to be written to the output stream`() { + val actual = writeAsHexString { + writeIVM() + finish() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `after calling finish, it should still be possible to write more data`() { + val actual = writeAsHexString { + finish() + writeIVM() + close() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `calling close should cause the buffered data to be written to the output stream`() { + val actual = writeAsHexString { + writeIVM() + close() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `calling close or finish multiple times should not throw any exceptions`() { + val actual = writeAsHexString { + writeIVM() + finish() + close() + finish() + close() + finish() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `write the IVM`() { + assertWriterOutputEquals("E0 01 01 EA") { + writeIVM() + close() + } + } + + @Test + fun `write nothing`() { + assertWriterOutputEquals("") { + close() + } + } + + @Test + fun `write a null`() { + assertWriterOutputEquals("EA") { + writeNull() + close() + } + } + + @Test + fun `write a null with a specific type`() { + // Just checking one type. The full range of types are checked in IonEncoder_1_1Test + assertWriterOutputEquals("EB 00") { + writeNull(IonType.BOOL) + close() + } + } + + @ParameterizedTest + @CsvSource("true, 5E", "false, 5F") + fun `write a boolean`(value: Boolean, hexBytes: String) { + assertWriterOutputEquals(hexBytes) { + writeBool(value) + close() + } + } + + @Test + fun `write a delimited list`() { + assertWriterOutputEquals("F1 5E 5F F0") { + stepInList(true) + writeBool(true) + writeBool(false) + stepOut() + close() + } + } + + @Test + fun `write a prefixed list`() { + assertWriterOutputEquals("A2 5E 5F") { + stepInList(false) + writeBool(true) + writeBool(false) + stepOut() + close() + } + } + + @Test + fun `write a variable-length prefixed list`() { + assertWriterOutputEquals("FA 21${" 5E".repeat(16)}") { + stepInList(false) + repeat(16) { writeBool(true) } + stepOut() + finish() + close() + } + } + + @Test + fun `write a prefixed list that is so long it requires patch points`() { + assertWriterOutputEquals("FA 02 02${" 5E".repeat(128)}") { + stepInList(false) + repeat(128) { writeBool(true) } + stepOut() + close() + } + } + + @Test + fun `write multiple nested prefixed lists`() { + assertWriterOutputEquals("A4 A3 A2 A1 A0") { + repeat(5) { stepInList(false) } + repeat(5) { stepOut() } + close() + } + } + + @Test + fun `write multiple nested delimited lists`() { + assertWriterOutputEquals("F1 F1 F1 F1 F0 F0 F0 F0") { + repeat(4) { stepInList(true) } + repeat(4) { stepOut() } + close() + } + } + + @Test + fun `write multiple nested delimited and prefixed lists`() { + assertWriterOutputEquals("F1 A9 F1 A6 F1 A3 F1 A0 F0 F0 F0 F0") { + repeat(4) { + stepInList(true) + stepInList(false) + } + repeat(8) { stepOut() } + close() + } + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java b/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java index e71c4fb7c4..2f140c0293 100644 --- a/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java +++ b/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java @@ -18,11 +18,7 @@ import static com.amazon.ion.TestUtils.hexDump; import static com.amazon.ion.impl.bin.WriteBuffer.varUIntLength; import static com.amazon.ion.impl.bin.WriteBuffer.writeVarUIntTo; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -79,11 +75,17 @@ private void assertBuffer(final byte[] expected) final byte[] actual = bytes(); assertArrayEquals( - "Bytes don't match!\nEXPECTED:\n" + hexDump(expected) + "\nACTUAL:\n" + hexDump(actual) + "\n", - expected, actual + expected, actual, + "Bytes don't match!\nEXPECTED:\n" + hexDump(expected) + "\nACTUAL:\n" + hexDump(actual) + "\n" ); } + @Test + public void testConstructorThrowsWhenBlockSizeTooSmall() { + BlockAllocator ba = BlockAllocatorProviders.basicProvider().vendAllocator(9); + assertThrows(IllegalArgumentException.class, () -> new WriteBuffer(ba, () -> {})); + } + @Test public void testInt8Positive() { @@ -1121,6 +1123,32 @@ public void shiftBytesLeftWithLengthZeroAcrossBlocks() { assertBuffer("0123456789".getBytes()); } + @Test + public void reserveShouldSkipTheRequestedNumberOfBytes() { + buf.reserve(5); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0A".getBytes()); + } + + @Test + public void reserveShouldSkipTheRequestedNumberOfBytesAcrossOneBlock() { + assertEquals(11, ALLOCATOR.getBlockSize()); + buf.reserve(15); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0A".getBytes()); + } + + @Test + public void reserveShouldSkipTheRequestedNumberOfBytesAcrossManyBlock() { + assertEquals(11, ALLOCATOR.getBlockSize()); + buf.reserve(40); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0A".getBytes()); + } + /** * Test if the method 'writeVarUIntTo' writes the expected bytes to the output stream. * @throws Exception if there is an error occurred while writing data to the output stream. @@ -1386,6 +1414,25 @@ public void testWriteFlexInt(long value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexIntAcrossBlocks() { + long value = Long.MIN_VALUE; + String expectedNumberBits = "00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @ParameterizedTest @CsvSource({ " 0, 00000001", @@ -1424,6 +1471,25 @@ public void testWriteFlexUInt(long value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexUIntAcrossBlocks() { + long value = Long.MAX_VALUE; + String expectedNumberBits = "00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexUInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @Test public void testWriteFlexUIntForNegativeNumber() { Assertions.assertThrows(IllegalArgumentException.class, () -> buf.writeFlexUInt(-1)); @@ -1486,7 +1552,6 @@ public void testWriteFlexUIntForNegativeNumber() { // Long.MIN_VALUE "-9223372036854775808, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110", "-9223372036854775809, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111101", - }) public void testWriteFlexIntForBigInteger(String value, String expectedBits) { int numBytes = buf.writeFlexInt(new BigInteger(value)); @@ -1495,6 +1560,25 @@ public void testWriteFlexIntForBigInteger(String value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexIntForBigIntegerAcrossBlocks() { + BigInteger value = new BigInteger("-9223372036854775809"); + String expectedNumberBits = "00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111101"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @ParameterizedTest @CsvSource({ " 0, 00000001", @@ -1534,6 +1618,25 @@ public void testWriteFlexUIntForBigInteger(String value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexUIntForBigIntegerAcrossBlocks() { + BigInteger value = new BigInteger("9223372036854775808"); + String expectedNumberBits = "00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexUInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @Test public void testWriteFlexUIntForNegativeBigInteger() { Assertions.assertThrows(IllegalArgumentException.class, () -> buf.writeFlexUInt(BigInteger.ONE.negate()));