Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of Ion 1.1 raw binary writer #660

Merged
merged 3 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ public static final boolean isSurrogate(int c) {
(byte) 0xEA };

/**
* The byte sequence indicating use of Ion 1.0 binary format.
* The byte sequence indicating use of Ion 1.1 binary format.
*/
public static final byte[] BINARY_VERSION_MARKER_1_1 = { (byte) 0xE0,
(byte) 0x01,
Expand Down
25 changes: 22 additions & 3 deletions src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;

import java.util.ArrayList;
Expand Down Expand Up @@ -52,7 +54,7 @@ public T next() {

/**
* @param initialCapacity the initial capacity of the underlying collection.
* @param elementFactory the factory used to create a new element on {@link #push()} when the queue has
* @param elementFactory the factory used to create a new element on {@link #push(Recycler)} when the queue has
* not previously grown to the new depth.
*/
public _Private_RecyclingQueue(int initialCapacity, ElementFactory<T> elementFactory) {
Expand All @@ -73,7 +75,7 @@ public T get(int index) {
/**
* Pushes an element onto the top of the queue, instantiating a new element only if the queue has not
* previously grown to the new depth.
* @return the element at the top of the queue after the push. This element must be initialized by the caller.
* @return the index of the element at the top of the queue after the push. This element must be initialized by the caller.
*/
public int push(Recycler<T> recycler) {
currentIndex++;
Expand All @@ -87,6 +89,23 @@ public int push(Recycler<T> recycler) {
return currentIndex;
}

/**
* Pushes an element onto the top of the queue, instantiating a new element only if the queue has not
* previously grown to the new depth.
* @return the element at the top of the queue after the push.
*/
public T pushAndGet(Recycler<T> recycler) {
currentIndex++;
if (currentIndex >= elements.size()) {
top = elementFactory.newElement();
elements.add(top);
} else {
top = elements.get(currentIndex);
}
recycler.recycle(top);
return top;
}

/**
* Reclaim the current element.
*/
Expand Down Expand Up @@ -119,4 +138,4 @@ public void clear() {
public int size() {
return currentIndex + 1;
}
}
}
175 changes: 175 additions & 0 deletions src/main/java/com/amazon/ion/impl/bin/FlexInt.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.bin

import java.math.BigInteger

/**
* Functions for encoding FlexInts and FlexUInts.
*
* Expected usage is calling one of the `___length` functions, and then using the result as the input for
* [writeFlexIntOrUIntInto]. The length and write functions are separate so that callers can make decisions or
* compute other values based on the encoded size of the value.
*/
object FlexInt {

/** Determine the length of FlexUInt for the provided value. */
@JvmStatic
fun flexUIntLength(value: Long): Int {
val numLeadingZeros = java.lang.Long.numberOfLeadingZeros(value)
val numMagnitudeBitsRequired = 64 - numLeadingZeros
return (numMagnitudeBitsRequired - 1) / 7 + 1
}

/** Determine the length of FlexInt for the provided value. */
@JvmStatic
fun flexIntLength(value: Long): Int {
val numMagnitudeBitsRequired: Int
numMagnitudeBitsRequired = if (value < 0) {
val numLeadingOnes = java.lang.Long.numberOfLeadingZeros(value.inv())
64 - numLeadingOnes
} else {
val numLeadingZeros = java.lang.Long.numberOfLeadingZeros(value)
64 - numLeadingZeros
}
return numMagnitudeBitsRequired / 7 + 1
}

/**
* Writes a FlexInt or FlexUInt encoding of [value] into [data] starting at [offset].
* Use [flexIntLength] or [flexUIntLength] to get the value for the [numBytes] parameter.
*/
@JvmStatic
fun writeFlexIntOrUIntInto(data: ByteArray, offset: Int, value: Long, numBytes: Int) {
var i = offset

when (numBytes) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I discovered that using when to compare directly against some constants would allow the Kotlin compiler to generate a jump table, so I figured I'd just manually unroll the old loop based logic. Now the only branching in this method is the jump table for the when, compared to the chained if/else falling back to the loop in the previous implementation.

1 -> {
data[i] = (0x01L or (value shl 1)).toByte()
}
2 -> {
data[i] = (0x02L or (value shl 2)).toByte()
data[++i] = (value shr 6).toByte()
}
3 -> {
data[i] = (0x04L or (value shl 3)).toByte()
data[++i] = (value shr 5).toByte()
data[++i] = (value shr 13).toByte()
}
4 -> {
data[i] = (0x08L or (value shl 4)).toByte()
data[++i] = (value shr 4).toByte()
data[++i] = (value shr 12).toByte()
data[++i] = (value shr 20).toByte()
}
5 -> {
data[i] = (0x10L or (value shl 5)).toByte()
data[++i] = (value shr 3).toByte()
data[++i] = (value shr 11).toByte()
data[++i] = (value shr 19).toByte()
data[++i] = (value shr 27).toByte()
}
6 -> {
data[i] = (0x20L or (value shl 6)).toByte()
data[++i] = (value shr 2).toByte()
data[++i] = (value shr 10).toByte()
data[++i] = (value shr 18).toByte()
data[++i] = (value shr 26).toByte()
data[++i] = (value shr 34).toByte()
}
7 -> {
data[i] = (0x40L or (value shl 7)).toByte()
data[++i] = (value shr 1).toByte()
data[++i] = (value shr 9).toByte()
data[++i] = (value shr 17).toByte()
data[++i] = (value shr 25).toByte()
data[++i] = (value shr 33).toByte()
data[++i] = (value shr 41).toByte()
}
8 -> {
data[i] = 0x80.toByte()
data[++i] = (value shr 0).toByte()
data[++i] = (value shr 8).toByte()
data[++i] = (value shr 16).toByte()
data[++i] = (value shr 24).toByte()
data[++i] = (value shr 32).toByte()
data[++i] = (value shr 40).toByte()
data[++i] = (value shr 48).toByte()
}
9 -> {
data[i] = 0
data[++i] = (0x01L or (value shl 1)).toByte()
data[++i] = (value shr 7).toByte()
data[++i] = (value shr 15).toByte()
data[++i] = (value shr 23).toByte()
data[++i] = (value shr 31).toByte()
data[++i] = (value shr 39).toByte()
data[++i] = (value shr 47).toByte()
data[++i] = (value shr 55).toByte()
}
10 -> {
data[i] = 0
data[++i] = (0x02L or (value shl 2)).toByte()
data[++i] = (value shr 6).toByte()
data[++i] = (value shr 14).toByte()
data[++i] = (value shr 22).toByte()
data[++i] = (value shr 30).toByte()
data[++i] = (value shr 38).toByte()
data[++i] = (value shr 46).toByte()
data[++i] = (value shr 54).toByte()
data[++i] = (value shr 62).toByte()
}
}
}

/** Determine the length of FlexUInt for the provided value. */
@JvmStatic
fun flexUIntLength(value: BigInteger): Int {
return (value.bitLength() - 1) / 7 + 1
}

/** Determine the length of FlexInt for the provided value. */
@JvmStatic
fun flexIntLength(value: BigInteger): Int {
return value.bitLength() / 7 + 1
}

/**
* Writes a FlexInt or FlexUInt encoding of [value] into [data] starting at [offset].
* Use [flexIntLength] or [flexUIntLength] to get the value for the [numBytes] parameter.
*/
@JvmStatic
fun writeFlexIntOrUIntInto(data: ByteArray, offset: Int, value: BigInteger, numBytes: Int) {
// TODO: Should we branch to the implementation for long if the number is small enough?
// https://github.com/amazon-ion/ion-java/issues/614
val valueBytes = value.toByteArray()
var i = 0 // `i` gets incremented for every byte written.

// Start with leading zero bytes.
// If there's 1-8 total bytes, we need no leading zero-bytes.
// If there's 9-16 total bytes, we need one zero-byte
// If there's 17-24 total bytes, we need two zero-bytes, etc.
while (i < (numBytes - 1) / 8) {
data[offset + i] = 0
i++
}

// Write the last length bits, possibly also containing some value bits.
val remainingLengthBits = (numBytes - 1) % 8
val lengthPart = (0x01 shl remainingLengthBits).toByte()
val valueBitOffset = remainingLengthBits + 1
val valuePart = (valueBytes[valueBytes.size - 1].toInt() shl valueBitOffset).toByte()
data[offset + i] = (valuePart.toInt() or lengthPart.toInt()).toByte()
i++
for (valueByteOffset in valueBytes.size - 1 downTo 1) {
// Technically it's only a nibble if the bitOffset is 4, so we call it nibble-ish
val highNibbleIsh = (valueBytes[valueByteOffset - 1].toInt() shl valueBitOffset).toByte()
val lowNibbleIsh = (valueBytes[valueByteOffset].toInt() and 0xFF shr 8 - valueBitOffset).toByte()
data[offset + i] = (highNibbleIsh.toInt() or lowNibbleIsh.toInt()).toByte()
i++
}
if (i < numBytes) {
data[offset + i] = (valueBytes[0].toInt() shr 8 - valueBitOffset).toByte()
}
}
}
6 changes: 3 additions & 3 deletions src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ public static int writeDecimalValue(WriteBuffer buffer, final BigDecimal value)
}

int exponent = -value.scale();
int numExponentBytes = WriteBuffer.flexIntLength(exponent);
int numExponentBytes = FlexInt.flexIntLength(exponent);

byte[] coefficientBytes = null;
int numCoefficientBytes;
Expand Down Expand Up @@ -419,7 +419,7 @@ static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) {
BigDecimal fractionalSeconds = value.getZFractionalSecond();

long exponent = fractionalSeconds.scale();
int numExponentBytes = WriteBuffer.flexUIntLength(exponent);
int numExponentBytes = FlexInt.flexUIntLength(exponent);

BigInteger coefficient = fractionalSeconds.unscaledValue();
byte[] coefficientBytes = null;
Expand Down Expand Up @@ -556,7 +556,7 @@ public static int writeAnnotations(WriteBuffer buffer, long[] annotations) {
} else {
int numAddressBytes = 0;
for (long ann : annotations) {
numAddressBytes += WriteBuffer.flexUIntLength(ann);
numAddressBytes += FlexInt.flexUIntLength(ann);
}
buffer.writeByte(OpCodes.ANNOTATIONS_MANY_SYMBOL_ADDRESS);
int numLengthBytes = buffer.writeFlexUInt(numAddressBytes);
Expand Down
34 changes: 0 additions & 34 deletions src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
import java.math.BigInteger;
import java.util.Iterator;
import java.util.ListIterator;
import java.util.NoSuchElementException;

/**
* Low-level binary {@link IonWriter} that understands encoding concerns but doesn't operate with any sense of symbol table management.
Expand Down Expand Up @@ -296,39 +295,6 @@ public String toString()
}
}

private static class PatchPoint
{
/** position of the data being patched out. */
public long oldPosition;
/** length of the data being patched out.*/
public int oldLength;
/** size of the container data or annotations.*/
public long length;
public PatchPoint()
{
oldPosition = -1;
oldLength = -1;
length = -1;
}

@Override
public String toString()
{
return "(PP old::(" + oldPosition + " " + oldLength + ") patch::(" + length + ")";
}

public PatchPoint initialize(final long oldPosition, final int oldLength, final long length) {
this.oldPosition = oldPosition;
this.oldLength = oldLength;
this.length = length;
return this;
}

public PatchPoint clear() {
return initialize(-1, -1, -1);
}
}

/*package*/ enum StreamCloseMode
{
NO_CLOSE,
Expand Down
Loading
Loading