diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000..78ec220
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,19 @@
+name: ci
+
+on:
+ - push
+ - pull_request
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v1
+ - uses: actions/setup-java@v3
+ with:
+ distribution: 'temurin'
+ java-version: 8
+ - name: Maven Install
+ run: mvn install -B -V -DskipTests -Dair.check.skip-all
+ - name: Maven Tests
+ run: mvn install -B -P ci
diff --git a/README.md b/README.md
index 2a5b49c..60cb7c6 100644
--- a/README.md
+++ b/README.md
@@ -1,219 +1,3 @@
# 🚚 MOVED 🚚
### __Future development of Snappy without JNI has moved to [aircompressor](https://github.com/airlift/aircompressor)__
-
-
-
-# Snappy in Java
-
-This is a rewrite (port) of [Snappy](http://code.google.com/p/snappy/) written in
-pure Java. This compression code produces a byte-for-byte exact copy of the output
-created by the original C++ code, and extremely fast.
-
-# Performance
-
-The Snappy micro-benchmark has been ported, and can be used to measure
-the performance of this code against the excellent Snappy JNI wrapper from
-[xerial](http://code.google.com/p/snappy-java/). As you can see in the results
-below, the pure Java port is 20-30% faster for block compress, 0-10% slower
-for block uncompress, and 0-5% slower for round-trip block compression. These
-results were run with Java 7 on a Core i7, 64-bit Mac.
-
-As a second more independent test, the performance has been measured using the
-Ning JVM compression benchmark against Snappy JNI, and the pure Java
-[Ning LZF](https://github.com/ning/compress) codec. The
-[results](http://dain.github.com/snappy/) show that the pure Java Snappy is
-20-30% faster than JNI Snappy for compression, and is typically 10-20% slower
-for decompression. Both, the pure Java Snappy and JNI Snappy implementations
-are faster that the Ning LZF codec. These results were run with Java 6 on a
-Core i7, 64-bit Mac.
-
-The difference in performance between these two tests is due to the difference
-in JVM version; Java 7 is consistently 5-10% faster than Java 6 in the
-compression code. As with all benchmarks your mileage will vary, so test with
-your actual use case.
-
-
-
-### Block Compress
-
-
-# Stream Format
-
-There is no defined stream format for Snappy, but there is an effort to create
-a common format with the Google Snappy project.
-
-The stream format used in this library has a couple of unique features not
-found in the other Snappy stream formats. Like the other formats, the user
-input is broken into blocks and each block is compressed. If the compressed
-block is smaller that the user input, the compressed block is written,
-otherwise the uncompressed original is written. This dramatically improves the
-speed of uncompressible input such as JPG images. Additionally, a checksum of
-the user input data for each block is written to the stream. This safety check
-assures that the stream has not been corrupted in transit or by a bad Snappy
-implementation. Finally, like gzip, compressed Snappy files can be
-concatenated together without issue, since the input stream will ignore a
-Snappy stream header in the middle of a stream. This makes combining files in
-Hadoop and S3 trivial.
-
-The the SnappyOutputStream javadocs contain formal definition of the stream
-format.
-
-## Stream Performance
-
-The streaming mode performance can not be directly compared to other
-compression algorithms since most formats do not contain a checksum. The basic
-streaming code is significantly faster that the Snappy JNI library due to
-the completely unoptimized stream implementation in Snappy JNI, but once the
-check sum is enabled the performance drops off by about 20%.
-
-### Stream Compress (no checksums)
-
diff --git a/pom.xml b/pom.xml
index feb27ca..f7e9d2c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -62,6 +62,11 @@
true
+
+ maven_central
+ Maven Central
+ https://repo.maven.apache.org/maven2/
+
@@ -78,29 +83,36 @@
+
org.apache.hadoop
- hadoop-core
- 0.20.2
+ hadoop-common
+ 3.4.0trueprovidedcom.google.guavaguava
- 13.0.1
+ 33.2.0-jretestorg.xerial.snappysnappy-java
- 1.0.4.1
+ 1.1.10.4testorg.testngtestng
- 6.0.1
+ 7.5.1test
@@ -110,7 +122,7 @@
org.apache.maven.pluginsmaven-enforcer-plugin
- 1.0
+ 3.4.1enforce-versions
@@ -123,7 +135,7 @@
3.0.0
- 1.6
+ 1.8
@@ -134,47 +146,6 @@
org.apache.maven.pluginsmaven-source-plugin
-
-
- org.apache.maven.plugins
- maven-jar-plugin
- 2.3.2
-
-
- binary
- package
-
- jar
-
-
- bin
-
-
- org.iq80.snappy.Main
-
-
-
-
-
-
-
-
- org.skife.maven
- really-executable-jar-maven-plugin
- 1.0.3
-
-
- package
-
- really-executable-jar
-
-
- bin
-
-
-
-
-
@@ -212,8 +183,8 @@
maven-compiler-plugin2.3.2
-
- 1.6
+
+ 1.8
diff --git a/src/main/java/org/iq80/snappy/AbstractSnappyInputStream.java b/src/main/java/org/iq80/snappy/AbstractSnappyInputStream.java
deleted file mode 100644
index 322bbde..0000000
--- a/src/main/java/org/iq80/snappy/AbstractSnappyInputStream.java
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-
-import static java.lang.Math.min;
-import static org.iq80.snappy.SnappyInternalUtils.checkNotNull;
-import static org.iq80.snappy.SnappyInternalUtils.checkPositionIndexes;
-import static org.iq80.snappy.SnappyInternalUtils.readBytes;
-
-/**
- * A common base class for frame based snappy input streams.
- */
-abstract class AbstractSnappyInputStream
- extends InputStream
-{
- private final InputStream in;
- private final byte[] frameHeader;
- private final boolean verifyChecksums;
- private final BufferRecycler recycler;
-
- /**
- * A single frame read from the underlying {@link InputStream}.
- */
- private byte[] input;
-
- /**
- * The decompressed data from {@link #input}.
- */
- private byte[] uncompressed;
-
- /**
- * Indicates if this instance has been closed.
- */
- private boolean closed;
-
- /**
- * Indicates if we have reached the EOF on {@link #in}.
- */
- private boolean eof;
-
- /**
- * The position in {@link #input} to read to.
- */
- private int valid;
-
- /**
- * The next position to read from {@link #buffer}.
- */
- private int position;
-
- /**
- * Buffer is a reference to the real buffer of uncompressed data for the
- * current block: uncompressed if the block is compressed, or input if it is
- * not.
- */
- private byte[] buffer;
-
- /**
- * Creates a Snappy input stream to read data from the specified underlying
- * input stream.
- *
- * @param in the underlying input stream
- * @param verifyChecksums if true, checksums in input stream will be verified
- * @param expectedHeader the expected stream header
- */
- public AbstractSnappyInputStream(InputStream in, int maxBlockSize, int frameHeaderSize, boolean verifyChecksums, byte[] expectedHeader)
- throws IOException
- {
- this.in = in;
- this.verifyChecksums = verifyChecksums;
- this.recycler = BufferRecycler.instance();
- allocateBuffersBasedOnSize(maxBlockSize + 5);
- this.frameHeader = new byte[frameHeaderSize];
-
- // stream must begin with stream header
- byte[] actualHeader = new byte[expectedHeader.length];
-
- int read = readBytes(in, actualHeader, 0, actualHeader.length);
- if (read < expectedHeader.length) {
- throw new EOFException("encountered EOF while reading stream header");
- }
- if (!Arrays.equals(expectedHeader, actualHeader)) {
- throw new IOException("invalid stream header");
- }
- }
-
- private void allocateBuffersBasedOnSize(int size)
- {
- input = recycler.allocInputBuffer(size);
- uncompressed = recycler.allocDecodeBuffer(size);
- }
-
- @Override
- public int read()
- throws IOException
- {
- if (closed) {
- return -1;
- }
- if (!ensureBuffer()) {
- return -1;
- }
- return buffer[position++] & 0xFF;
- }
-
- @Override
- public int read(byte[] output, int offset, int length)
- throws IOException
- {
- checkNotNull(output, "output is null");
- checkPositionIndexes(offset, offset + length, output.length);
- if (closed) {
- throw new IOException("Stream is closed");
- }
-
- if (length == 0) {
- return 0;
- }
- if (!ensureBuffer()) {
- return -1;
- }
-
- int size = min(length, available());
- System.arraycopy(buffer, position, output, offset, size);
- position += size;
- return size;
- }
-
- @Override
- public int available()
- throws IOException
- {
- if (closed) {
- return 0;
- }
- return valid - position;
- }
-
- @Override
- public void close()
- throws IOException
- {
- try {
- in.close();
- }
- finally {
- if (!closed) {
- closed = true;
- recycler.releaseInputBuffer(input);
- recycler.releaseDecodeBuffer(uncompressed);
- }
- }
- }
-
- enum FrameAction
- {
- RAW, SKIP, UNCOMPRESS
- }
-
- public static final class FrameMetaData
- {
- final int length;
- final FrameAction frameAction;
-
- /**
- * @param frameAction
- * @param length
- */
- public FrameMetaData(FrameAction frameAction, int length)
- {
- this.frameAction = frameAction;
- this.length = length;
- }
- }
-
- public static final class FrameData
- {
- final int checkSum;
- final int offset;
-
- public FrameData(int checkSum, int offset)
- {
- this.checkSum = checkSum;
- this.offset = offset;
- }
- }
-
- private boolean ensureBuffer()
- throws IOException
- {
- if (available() > 0) {
- return true;
- }
- if (eof) {
- return false;
- }
-
- if (!readBlockHeader()) {
- eof = true;
- return false;
- }
-
- // get action based on header
- FrameMetaData frameMetaData = getFrameMetaData(frameHeader);
-
- if (FrameAction.SKIP == frameMetaData.frameAction) {
- SnappyInternalUtils.skip(in, frameMetaData.length);
- return ensureBuffer();
- }
-
- if (frameMetaData.length > input.length) {
- allocateBuffersBasedOnSize(frameMetaData.length);
- }
-
- int actualRead = readBytes(in, input, 0, frameMetaData.length);
- if (actualRead != frameMetaData.length) {
- throw new EOFException("unexpectd EOF when reading frame");
- }
-
- FrameData frameData = getFrameData(frameHeader, input, actualRead);
-
- if (FrameAction.UNCOMPRESS == frameMetaData.frameAction) {
- int uncompressedLength = Snappy.getUncompressedLength(input,
- frameData.offset);
-
- if (uncompressedLength > uncompressed.length) {
- uncompressed = recycler.allocDecodeBuffer(uncompressedLength);
- }
-
- this.valid = Snappy.uncompress(input, frameData.offset, actualRead
- - frameData.offset, uncompressed, 0);
- this.buffer = uncompressed;
- this.position = 0;
- }
- else {
- // we need to start reading at the offset
- this.position = frameData.offset;
- this.buffer = input;
- // valid is until the end of the read data, regardless of offset
- // indicating where we start
- this.valid = actualRead;
- }
-
- if (verifyChecksums) {
- int actualCrc32c = Crc32C.maskedCrc32c(buffer, position, valid - position);
- if (frameData.checkSum != actualCrc32c) {
- throw new IOException("Corrupt input: invalid checksum");
- }
- }
-
- return true;
- }
-
- /**
- * Use the content of the frameHeader to describe what type of frame we have
- * and the action to take.
- */
- protected abstract FrameMetaData getFrameMetaData(byte[] frameHeader)
- throws IOException;
-
- /**
- * Take the frame header and the content of the frame to describe metadata
- * about the content.
- *
- * @param frameHeader The frame header.
- * @param content The content of the of the frame. Content begins at index {@code 0}.
- * @param length The length of the content.
- * @return Metadata about the content of the frame.
- */
- protected abstract FrameData getFrameData(byte[] frameHeader, byte[] content, int length);
-
- private boolean readBlockHeader()
- throws IOException
- {
- int read = readBytes(in, frameHeader, 0, frameHeader.length);
-
- if (read == -1) {
- return false;
- }
-
- if (read < frameHeader.length) {
- throw new EOFException("encountered EOF while reading block header");
- }
-
- return true;
- }
-}
diff --git a/src/main/java/org/iq80/snappy/AbstractSnappyOutputStream.java b/src/main/java/org/iq80/snappy/AbstractSnappyOutputStream.java
deleted file mode 100644
index abaf3fa..0000000
--- a/src/main/java/org/iq80/snappy/AbstractSnappyOutputStream.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import static org.iq80.snappy.Crc32C.maskedCrc32c;
-import static org.iq80.snappy.Snappy.maxCompressedLength;
-import static org.iq80.snappy.SnappyInternalUtils.checkArgument;
-import static org.iq80.snappy.SnappyInternalUtils.checkNotNull;
-import static org.iq80.snappy.SnappyInternalUtils.checkPositionIndexes;
-
-/**
- * This is a base class supporting both the {@link SnappyOutputStream} and
- * {@link SnappyFramedOutputStream}.
- *
- *
- * Delegates writing the header bytes and individual frames to the specific
- * implementations. Implementations may also override the crc32 checksum
- * calculation.
- *
- *
- * @since 0.4
- */
-abstract class AbstractSnappyOutputStream
- extends OutputStream
-{
- private final BufferRecycler recycler;
- private final int blockSize;
- private final byte[] buffer;
- private final byte[] outputBuffer;
- private final double minCompressionRatio;
-
- private final OutputStream out;
-
- private int position;
- private boolean closed;
-
- /**
- * @param out The underlying {@link OutputStream} to write to. Must not be {@code null}.
- * @param blockSize The block size (of raw data) to compress before writing frames to out.
- * @param minCompressionRatio Defines the minimum compression ratio ({@code compressedLength / rawLength}) that must be achieved to
- * write the compressed data. This must be in (0, 1.0].
- */
- public AbstractSnappyOutputStream(OutputStream out, int blockSize, double minCompressionRatio)
- throws IOException
- {
- this.out = checkNotNull(out, "out is null");
- checkArgument(minCompressionRatio > 0 && minCompressionRatio <= 1.0, "minCompressionRatio %1s must be between (0,1.0].", minCompressionRatio);
- this.minCompressionRatio = minCompressionRatio;
- this.recycler = BufferRecycler.instance();
- this.blockSize = blockSize;
- this.buffer = recycler.allocOutputBuffer(blockSize);
- this.outputBuffer = recycler.allocEncodingBuffer(maxCompressedLength(blockSize));
-
- writeHeader(out);
- }
-
- /**
- * Writes the implementation specific header or "marker bytes" to
- * out.
- *
- * @param out The underlying {@link OutputStream}.
- */
- protected abstract void writeHeader(OutputStream out)
- throws IOException;
-
- @Override
- public void write(int b)
- throws IOException
- {
- if (closed) {
- throw new IOException("Stream is closed");
- }
- if (position >= blockSize) {
- flushBuffer();
- }
- buffer[position++] = (byte) b;
- }
-
- @Override
- public void write(byte[] input, int offset, int length)
- throws IOException
- {
- checkNotNull(input, "input is null");
- checkPositionIndexes(offset, offset + length, input.length);
- if (closed) {
- throw new IOException("Stream is closed");
- }
-
- int free = blockSize - position;
-
- // easy case: enough free space in buffer for entire input
- if (free >= length) {
- copyToBuffer(input, offset, length);
- return;
- }
-
- // fill partial buffer as much as possible and flush
- if (position > 0) {
- copyToBuffer(input, offset, free);
- flushBuffer();
- offset += free;
- length -= free;
- }
-
- // write remaining full blocks directly from input array
- while (length >= blockSize) {
- writeCompressed(input, offset, blockSize);
- offset += blockSize;
- length -= blockSize;
- }
-
- // copy remaining partial block into now-empty buffer
- copyToBuffer(input, offset, length);
- }
-
- @Override
- public final void flush()
- throws IOException
- {
- if (closed) {
- throw new IOException("Stream is closed");
- }
- flushBuffer();
- out.flush();
- }
-
- @Override
- public final void close()
- throws IOException
- {
- if (closed) {
- return;
- }
- try {
- flush();
- out.close();
- }
- finally {
- closed = true;
- recycler.releaseOutputBuffer(outputBuffer);
- recycler.releaseEncodeBuffer(buffer);
- }
- }
-
- private void copyToBuffer(byte[] input, int offset, int length)
- {
- System.arraycopy(input, offset, buffer, position, length);
- position += length;
- }
-
- /**
- * Compresses and writes out any buffered data. This does nothing if there
- * is no currently buffered data.
- */
- private void flushBuffer()
- throws IOException
- {
- if (position > 0) {
- writeCompressed(buffer, 0, position);
- position = 0;
- }
- }
-
- /**
- * {@link #calculateCRC32C(byte[], int, int) Calculates} the crc, compresses
- * the data, determines if the compression ratio is acceptable and calls
- * {@link #writeBlock(OutputStream, byte[], int, int, boolean, int)} to
- * actually write the frame.
- *
- * @param input The byte[] containing the raw data to be compressed.
- * @param offset The offset into input where the data starts.
- * @param length The amount of data in input.
- */
- private void writeCompressed(byte[] input, int offset, int length)
- throws IOException
- {
- // crc is based on the user supplied input data
- int crc32c = calculateCRC32C(input, offset, length);
-
- int compressed = Snappy
- .compress(input, offset, length, outputBuffer, 0);
-
- // only use the compressed data if compression ratio is <= the minCompressionRatio
- if (((double) compressed / (double) length) <= minCompressionRatio) {
- writeBlock(out, outputBuffer, 0, compressed, true, crc32c);
- }
- else {
- // otherwise use the uncompressed data.
- writeBlock(out, input, offset, length, false, crc32c);
- }
- }
-
- /**
- * Calculates a CRC32C checksum over the data.
- *
- * This can be overridden to provider alternative implementations (such as
- * returning 0 if checksums are not desired).
- *
- *
- * @return The CRC32 checksum.
- */
- protected int calculateCRC32C(byte[] data, int offset, int length)
- {
- return maskedCrc32c(data, offset, length);
- }
-
- /**
- * Write a frame (block) to out.
- *
- * @param out The {@link OutputStream} to write to.
- * @param data The data to write.
- * @param offset The offset in data to start at.
- * @param length The length of data to use.
- * @param compressed Indicates if data is the compressed or raw content.
- * This is based on whether the compression ratio desired is
- * reached.
- * @param crc32c The calculated checksum.
- */
- protected abstract void writeBlock(OutputStream out, byte[] data, int offset, int length, boolean compressed, int crc32c)
- throws IOException;
-}
diff --git a/src/main/java/org/iq80/snappy/BufferRecycler.java b/src/main/java/org/iq80/snappy/BufferRecycler.java
deleted file mode 100644
index 6dfcc0d..0000000
--- a/src/main/java/org/iq80/snappy/BufferRecycler.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import java.lang.ref.SoftReference;
-
-/**
- * Simple helper class to encapsulate details of basic buffer
- * recycling scheme, which helps a lot (as per profiling) for
- * smaller encoding cases.
- *
- * @author tatu
- */
-class BufferRecycler
-{
- private static final int MIN_ENCODING_BUFFER = 4000;
-
- private static final int MIN_OUTPUT_BUFFER = 8000;
-
- /**
- * This ThreadLocal contains a {@link java.lang.ref.SoftReference}
- * to a {@link BufferRecycler} used to provide a low-cost
- * buffer recycling for buffers we need for encoding, decoding.
- */
- protected static final ThreadLocal> recyclerRef = new ThreadLocal>();
-
- private byte[] inputBuffer;
- private byte[] outputBuffer;
-
- private byte[] decodingBuffer;
- private byte[] encodingBuffer;
-
- private short[] encodingHash;
-
- /**
- * Accessor to get thread-local recycler instance
- */
- public static BufferRecycler instance()
- {
- SoftReference ref = recyclerRef.get();
-
- BufferRecycler bufferRecycler;
- if (ref == null) {
- bufferRecycler = null;
- }
- else {
- bufferRecycler = ref.get();
- }
-
- if (bufferRecycler == null) {
- bufferRecycler = new BufferRecycler();
- recyclerRef.set(new SoftReference(bufferRecycler));
- }
- return bufferRecycler;
- }
-
- public void clear()
- {
- inputBuffer = null;
- outputBuffer = null;
- decodingBuffer = null;
- encodingBuffer = null;
- encodingHash = null;
- }
-
- ///////////////////////////////////////////////////////////////////////
- // Buffers for encoding (output)
- ///////////////////////////////////////////////////////////////////////
-
- public byte[] allocEncodingBuffer(int minSize)
- {
- byte[] buf = encodingBuffer;
- if (buf == null || buf.length < minSize) {
- buf = new byte[Math.max(minSize, MIN_ENCODING_BUFFER)];
- }
- else {
- encodingBuffer = null;
- }
- return buf;
- }
-
- public void releaseEncodeBuffer(byte[] buffer)
- {
- if (encodingBuffer == null || buffer.length > encodingBuffer.length) {
- encodingBuffer = buffer;
- }
- }
-
- public byte[] allocOutputBuffer(int minSize)
- {
- byte[] buf = outputBuffer;
- if (buf == null || buf.length < minSize) {
- buf = new byte[Math.max(minSize, MIN_OUTPUT_BUFFER)];
- }
- else {
- outputBuffer = null;
- }
- return buf;
- }
-
- public void releaseOutputBuffer(byte[] buffer)
- {
- if (outputBuffer == null || (buffer != null && buffer.length > outputBuffer.length)) {
- outputBuffer = buffer;
- }
- }
-
- public short[] allocEncodingHash(int suggestedSize)
- {
- short[] buf = encodingHash;
- if (buf == null || buf.length < suggestedSize) {
- buf = new short[suggestedSize];
- }
- else {
- encodingHash = null;
- }
- return buf;
- }
-
- public void releaseEncodingHash(short[] buffer)
- {
- if (encodingHash == null || (buffer != null && buffer.length > encodingHash.length)) {
- encodingHash = buffer;
- }
- }
-
- ///////////////////////////////////////////////////////////////////////
- // Buffers for decoding (input)
- ///////////////////////////////////////////////////////////////////////
-
- public byte[] allocInputBuffer(int minSize)
- {
- byte[] buf = inputBuffer;
- if (buf == null || buf.length < minSize) {
- buf = new byte[Math.max(minSize, MIN_OUTPUT_BUFFER)];
- }
- else {
- inputBuffer = null;
- }
- return buf;
- }
-
- public void releaseInputBuffer(byte[] buffer)
- {
- if (inputBuffer == null || (buffer != null && buffer.length > inputBuffer.length)) {
- inputBuffer = buffer;
- }
- }
-
- public byte[] allocDecodeBuffer(int size)
- {
- byte[] buf = decodingBuffer;
- if (buf == null || buf.length < size) {
- buf = new byte[size];
- }
- else {
- decodingBuffer = null;
- }
- return buf;
- }
-
- public void releaseDecodeBuffer(byte[] buffer)
- {
- if (decodingBuffer == null || (buffer != null && buffer.length > decodingBuffer.length)) {
- decodingBuffer = buffer;
- }
- }
-}
diff --git a/src/main/java/org/iq80/snappy/CorruptionException.java b/src/main/java/org/iq80/snappy/CorruptionException.java
index 24e797f..d91a637 100644
--- a/src/main/java/org/iq80/snappy/CorruptionException.java
+++ b/src/main/java/org/iq80/snappy/CorruptionException.java
@@ -1,8 +1,4 @@
/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -20,22 +16,21 @@
public class CorruptionException
extends RuntimeException
{
- public CorruptionException()
- {
- }
+ private final long offset;
- public CorruptionException(String message)
+ public CorruptionException(long offset)
{
- super(message);
+ this(offset, "Malformed input");
}
- public CorruptionException(String message, Throwable cause)
+ public CorruptionException(long offset, String reason)
{
- super(message, cause);
+ super(reason + ": offset=" + offset);
+ this.offset = offset;
}
- public CorruptionException(Throwable cause)
+ public long getOffset()
{
- super(cause);
+ return offset;
}
}
diff --git a/src/main/java/org/iq80/snappy/Crc32C.java b/src/main/java/org/iq80/snappy/Crc32C.java
index cced9b0..9679946 100644
--- a/src/main/java/org/iq80/snappy/Crc32C.java
+++ b/src/main/java/org/iq80/snappy/Crc32C.java
@@ -47,24 +47,15 @@ public static int maskedCrc32c(byte[] data, int offset, int length)
* Return a masked representation of crc.
*
* Motivation: it is problematic to compute the CRC of a string that
- * contains embedded CRCs. Therefore we recommend that CRCs stored
+ * contains embedded CRCs. Therefore, we recommend that CRCs stored
* somewhere (e.g., in files) should be masked before being stored.
*/
- public static int mask(int crc)
+ private static int mask(int crc)
{
// Rotate right by 15 bits and add a constant.
return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
}
- /**
- * Return the crc whose masked representation is masked_crc.
- */
- public static int unmask(int maskedCrc)
- {
- int rot = maskedCrc - MASK_DELTA;
- return ((rot >>> 17) | (rot << 15));
- }
-
/**
* the current CRC value, bit-flipped
*/
@@ -73,17 +64,17 @@ public static int unmask(int maskedCrc)
/**
* Create a new PureJavaCrc32 object.
*/
- public Crc32C()
+ private Crc32C()
{
reset();
}
- public int getMaskedValue()
+ private int getMaskedValue()
{
return mask(getIntValue());
}
- public int getIntValue()
+ private int getIntValue()
{
return ~crc;
}
@@ -107,9 +98,13 @@ public void update(byte[] b, int off, int len)
int localCrc = crc;
while (len > 7) {
int c0 = b[off++] ^ localCrc;
- int c1 = b[off++] ^ (localCrc >>>= 8);
- int c2 = b[off++] ^ (localCrc >>>= 8);
- int c3 = b[off++] ^ (localCrc >>>= 8);
+ localCrc >>>= 8;
+ int c1 = b[off++] ^ localCrc;
+ localCrc >>>= 8;
+ int c2 = b[off++] ^ localCrc;
+ localCrc >>>= 8;
+ int c3 = b[off++] ^ localCrc;
+
localCrc = (T8_7[c0 & 0xff] ^ T8_6[c1 & 0xff])
^ (T8_5[c2 & 0xff] ^ T8_4[c3 & 0xff]);
@@ -137,7 +132,7 @@ public void update(int b)
// java -cp build/test/classes/:build/classes/ \
// org.apache.hadoop.util.TestPureJavaCrc32\$Table 82F63B78
- static final int[] T8_0 = new int[] {
+ private static final int[] T8_0 = new int[] {
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
@@ -203,7 +198,7 @@ public void update(int b)
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
};
- static final int[] T8_1 = new int[] {
+ private static final int[] T8_1 = new int[] {
0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899,
0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21,
@@ -269,7 +264,7 @@ public void update(int b)
0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F,
0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
};
- static final int[] T8_2 = new int[] {
+ private static final int[] T8_2 = new int[] {
0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073,
0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6,
@@ -335,7 +330,7 @@ public void update(int b)
0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2,
0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
};
- static final int[] T8_3 = new int[] {
+ private static final int[] T8_3 = new int[] {
0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939,
0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF,
@@ -401,7 +396,7 @@ public void update(int b)
0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1,
0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
};
- static final int[] T8_4 = new int[] {
+ private static final int[] T8_4 = new int[] {
0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4,
0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65,
@@ -467,7 +462,7 @@ public void update(int b)
0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013,
0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
};
- static final int[] T8_5 = new int[] {
+ private static final int[] T8_5 = new int[] {
0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA,
0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5,
@@ -533,7 +528,7 @@ public void update(int b)
0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B,
0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
};
- static final int[] T8_6 = new int[] {
+ private static final int[] T8_6 = new int[] {
0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558,
0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B,
@@ -599,7 +594,7 @@ public void update(int b)
0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E,
0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
};
- static final int[] T8_7 = new int[] {
+ private static final int[] T8_7 = new int[] {
0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769,
0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3,
diff --git a/src/main/java/org/iq80/snappy/HadoopSnappyCodec.java b/src/main/java/org/iq80/snappy/HadoopSnappyCodec.java
index 7ec8b81..aa942e5 100644
--- a/src/main/java/org/iq80/snappy/HadoopSnappyCodec.java
+++ b/src/main/java/org/iq80/snappy/HadoopSnappyCodec.java
@@ -17,69 +17,95 @@
*/
package org.iq80.snappy;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.Decompressor;
+import org.apache.hadoop.io.compress.DoNotPool;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY;
+
public class HadoopSnappyCodec
- implements CompressionCodec
+ implements Configurable, CompressionCodec
{
+ private Configuration conf;
+
+ @Override
+ public Configuration getConf()
+ {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf)
+ {
+ this.conf = conf;
+ }
+
@Override
public CompressionOutputStream createOutputStream(OutputStream outputStream)
throws IOException
{
- return new SnappyCompressionOutputStream(outputStream);
+ return new HadoopSnappyOutputStream(outputStream, getBufferSize());
}
@Override
public CompressionOutputStream createOutputStream(OutputStream outputStream, Compressor compressor)
throws IOException
{
- throw new UnsupportedOperationException("Snappy Compressor is not supported");
+ if (!(compressor instanceof HadoopSnappyCompressor)) {
+ throw new IllegalArgumentException("Compressor is not the Snappy decompressor");
+ }
+ return new HadoopSnappyOutputStream(outputStream, getBufferSize());
}
@Override
public Class extends Compressor> getCompressorType()
{
- throw new UnsupportedOperationException("Snappy Compressor is not supported");
+ return HadoopSnappyCompressor.class;
}
@Override
public Compressor createCompressor()
{
- throw new UnsupportedOperationException("Snappy Compressor is not supported");
+ return new HadoopSnappyCompressor();
}
@Override
public CompressionInputStream createInputStream(InputStream inputStream)
throws IOException
{
- return new SnappyCompressionInputStream(inputStream);
+ return new HadoopSnappyInputStream(inputStream);
}
@Override
- public CompressionInputStream createInputStream(InputStream inputStream, Decompressor decompressor)
+ public CompressionInputStream createInputStream(InputStream in, Decompressor decompressor)
throws IOException
{
- throw new UnsupportedOperationException("Snappy Decompressor is not supported");
+ if (!(decompressor instanceof HadoopSnappyDecompressor)) {
+ throw new IllegalArgumentException("Decompressor is not the Snappy decompressor");
+ }
+ return new HadoopSnappyInputStream(in);
}
@Override
public Class extends Decompressor> getDecompressorType()
{
- throw new UnsupportedOperationException("Snappy Decompressor is not supported");
+ return HadoopSnappyDecompressor.class;
}
@Override
public Decompressor createDecompressor()
{
- throw new UnsupportedOperationException("Snappy Decompressor is not supported");
+ return new HadoopSnappyDecompressor();
}
@Override
@@ -88,72 +114,141 @@ public String getDefaultExtension()
return ".snappy";
}
- private static class SnappyCompressionOutputStream
- extends CompressionOutputStream
+ private int getBufferSize()
{
- public SnappyCompressionOutputStream(OutputStream outputStream)
- throws IOException
+ // Favor using the configured buffer size. This is not as critical for Snappy
+ // since Snappy always writes the compressed chunk size, so we always know the
+ // correct buffer size to create.
+ int maxUncompressedLength;
+ if (conf != null) {
+ maxUncompressedLength = conf.getInt(IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT);
+ }
+ else {
+ maxUncompressedLength = IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT;
+ }
+ return maxUncompressedLength;
+ }
+
+ /**
+ * No Hadoop code seems to actually use the compressor, so just return a dummy one so the createOutputStream method
+ * with a compressor can function. This interface can be implemented if needed.
+ */
+ @DoNotPool
+ private static class HadoopSnappyCompressor
+ implements Compressor
+ {
+ @Override
+ public void setInput(byte[] b, int off, int len)
{
- super(new SnappyOutputStream(outputStream));
+ throw new UnsupportedOperationException("Snappy block compressor is not supported");
}
@Override
- public void write(byte[] b, int off, int len)
- throws IOException
+ public boolean needsInput()
{
- out.write(b, off, len);
+ throw new UnsupportedOperationException("Snappy block compressor is not supported");
+ }
+
+ @Override
+ public void setDictionary(byte[] b, int off, int len)
+ {
+ throw new UnsupportedOperationException("Snappy block compressor is not supported");
+ }
+
+ @Override
+ public long getBytesRead()
+ {
+ throw new UnsupportedOperationException("Snappy block compressor is not supported");
+ }
+
+ @Override
+ public long getBytesWritten()
+ {
+ throw new UnsupportedOperationException("Snappy block compressor is not supported");
}
@Override
public void finish()
- throws IOException
{
- out.flush();
+ throw new UnsupportedOperationException("Snappy block compressor is not supported");
}
@Override
- public void resetState()
- throws IOException
+ public boolean finished()
{
- out.flush();
+ throw new UnsupportedOperationException("Snappy block compressor is not supported");
}
@Override
- public void write(int b)
- throws IOException
+ public int compress(byte[] b, int off, int len)
{
- out.write(b);
+ throw new UnsupportedOperationException("Snappy block compressor is not supported");
}
+
+ @Override
+ public void reset() {}
+
+ @Override
+ public void end() {}
+
+ @Override
+ public void reinit(Configuration conf) {}
}
- private static class SnappyCompressionInputStream
- extends CompressionInputStream
+ /**
+ * No Hadoop code seems to actually use the decompressor, so just return a dummy one so the createInputStream method
+ * with a decompressor can function. This interface can be implemented if needed.
+ */
+ @DoNotPool
+ private static class HadoopSnappyDecompressor
+ implements Decompressor
{
- public SnappyCompressionInputStream(InputStream inputStream)
- throws IOException
+ @Override
+ public void setInput(byte[] b, int off, int len)
+ {
+ throw new UnsupportedOperationException("Snappy block decompressor is not supported");
+ }
+
+ @Override
+ public boolean needsInput()
{
- super(new SnappyInputStream(inputStream));
+ throw new UnsupportedOperationException("Snappy block decompressor is not supported");
}
@Override
- public int read(byte[] b, int off, int len)
- throws IOException
+ public void setDictionary(byte[] b, int off, int len)
{
- return in.read(b, off, len);
+ throw new UnsupportedOperationException("Snappy block decompressor is not supported");
}
@Override
- public void resetState()
- throws IOException
+ public boolean needsDictionary()
{
- throw new UnsupportedOperationException("resetState not supported for Snappy");
+ throw new UnsupportedOperationException("Snappy block decompressor is not supported");
}
@Override
- public int read()
- throws IOException
+ public boolean finished()
{
- return in.read();
+ throw new UnsupportedOperationException("Snappy block decompressor is not supported");
}
+
+ @Override
+ public int decompress(byte[] b, int off, int len)
+ {
+ throw new UnsupportedOperationException("Snappy block decompressor is not supported");
+ }
+
+ @Override
+ public int getRemaining()
+ {
+ throw new UnsupportedOperationException("Snappy block decompressor is not supported");
+ }
+
+ @Override
+ public void reset() {}
+
+ @Override
+ public void end() {}
}
}
diff --git a/src/main/java/org/iq80/snappy/HadoopSnappyInputStream.java b/src/main/java/org/iq80/snappy/HadoopSnappyInputStream.java
new file mode 100644
index 0000000..c3e0706
--- /dev/null
+++ b/src/main/java/org/iq80/snappy/HadoopSnappyInputStream.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.iq80.snappy;
+
+import org.apache.hadoop.io.compress.CompressionInputStream;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+
+import static org.iq80.snappy.SnappyConstants.SIZE_OF_LONG;
+
+class HadoopSnappyInputStream
+ extends CompressionInputStream
+{
+ private final InputStream in;
+
+ private int uncompressedBlockLength;
+ private byte[] uncompressedChunk = new byte[0];
+ private int uncompressedChunkOffset;
+ private int uncompressedChunkLength;
+
+ private byte[] compressed = new byte[0];
+
+ public HadoopSnappyInputStream(InputStream in)
+ throws IOException
+ {
+ super(in);
+ this.in = in;
+ }
+
+ @Override
+ public int read()
+ throws IOException
+ {
+ if (uncompressedChunkOffset >= uncompressedChunkLength) {
+ readNextChunk(uncompressedChunk, 0, uncompressedChunk.length);
+ if (uncompressedChunkLength == 0) {
+ return -1;
+ }
+ }
+ return uncompressedChunk[uncompressedChunkOffset++] & 0xFF;
+ }
+
+ @Override
+ public int read(byte[] output, int offset, int length)
+ throws IOException
+ {
+ if (uncompressedChunkOffset >= uncompressedChunkLength) {
+ boolean directDecompress = readNextChunk(output, offset, length);
+ if (uncompressedChunkLength == 0) {
+ return -1;
+ }
+ if (directDecompress) {
+ uncompressedChunkOffset += uncompressedChunkLength;
+ return uncompressedChunkLength;
+ }
+ }
+ int size = Math.min(length, uncompressedChunkLength - uncompressedChunkOffset);
+ System.arraycopy(uncompressedChunk, uncompressedChunkOffset, output, offset, size);
+ uncompressedChunkOffset += size;
+ return size;
+ }
+
+ @Override
+ public void resetState()
+ {
+ uncompressedBlockLength = 0;
+ uncompressedChunkOffset = 0;
+ uncompressedChunkLength = 0;
+ }
+
+ private boolean readNextChunk(byte[] userBuffer, int userOffset, int userLength)
+ throws IOException
+ {
+ uncompressedBlockLength -= uncompressedChunkOffset;
+ uncompressedChunkOffset = 0;
+ uncompressedChunkLength = 0;
+ while (uncompressedBlockLength == 0) {
+ uncompressedBlockLength = readBigEndianInt();
+ if (uncompressedBlockLength == -1) {
+ uncompressedBlockLength = 0;
+ return false;
+ }
+ }
+
+ int compressedChunkLength = readBigEndianInt();
+ if (compressedChunkLength == -1) {
+ return false;
+ }
+
+ if (compressed.length < compressedChunkLength) {
+ // over allocate buffer which makes decompression easier
+ compressed = new byte[compressedChunkLength + SIZE_OF_LONG];
+ }
+ readInput(compressedChunkLength, compressed);
+
+ uncompressedChunkLength = Snappy.getUncompressedLength(compressed, 0);
+ if (uncompressedChunkLength > uncompressedBlockLength) {
+ throw new IOException("Chunk uncompressed size is greater than block size");
+ }
+
+ boolean directUncompress = true;
+ if (uncompressedChunkLength > userLength) {
+ if (uncompressedChunk.length < uncompressedChunkLength) {
+ // over allocate buffer which makes decompression easier
+ uncompressedChunk = new byte[uncompressedChunkLength + SIZE_OF_LONG];
+ }
+ directUncompress = false;
+ userBuffer = uncompressedChunk;
+ userOffset = 0;
+ userLength = uncompressedChunk.length;
+ }
+
+ int bytes = Snappy.uncompress(compressed, 0, compressedChunkLength, userBuffer, userOffset, userLength);
+ if (uncompressedChunkLength != bytes) {
+ throw new IOException("Expected to read " + uncompressedChunkLength + " bytes, but data only contained " + bytes + " bytes");
+ }
+ return directUncompress;
+ }
+
+ private void readInput(int length, byte[] buffer)
+ throws IOException
+ {
+ int offset = 0;
+ while (offset < length) {
+ int size = in.read(buffer, offset, length - offset);
+ if (size == -1) {
+ throw new EOFException("encountered EOF while reading block data");
+ }
+ offset += size;
+ }
+ }
+
+ private int readBigEndianInt()
+ throws IOException
+ {
+ int b1 = in.read();
+ if (b1 < 0) {
+ return -1;
+ }
+ int b2 = in.read();
+ int b3 = in.read();
+ int b4 = in.read();
+
+ // If any of the other bits are negative, the stream it truncated
+ if ((b2 | b3 | b4) < 0) {
+ throw new IOException("Stream is truncated");
+ }
+ return ((b1 << 24) + (b2 << 16) + (b3 << 8) + (b4));
+ }
+}
diff --git a/src/main/java/org/iq80/snappy/HadoopSnappyOutputStream.java b/src/main/java/org/iq80/snappy/HadoopSnappyOutputStream.java
new file mode 100644
index 0000000..9f3fdd3
--- /dev/null
+++ b/src/main/java/org/iq80/snappy/HadoopSnappyOutputStream.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.iq80.snappy;
+
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import static org.iq80.snappy.SnappyConstants.SIZE_OF_LONG;
+
+class HadoopSnappyOutputStream
+ extends CompressionOutputStream
+{
+ private final byte[] inputBuffer;
+ private final int inputMaxSize;
+ private int inputOffset;
+
+ private final byte[] outputBuffer;
+
+ public HadoopSnappyOutputStream(OutputStream out, int bufferSize)
+ {
+ super(out);
+ inputBuffer = new byte[bufferSize];
+ // leave extra space free at end of buffers to make compression (slightly) faster
+ inputMaxSize = inputBuffer.length - compressionOverhead(bufferSize);
+ outputBuffer = new byte[Snappy.maxCompressedLength(inputMaxSize) + SIZE_OF_LONG];
+ }
+
+ @Override
+ public void write(int b)
+ throws IOException
+ {
+ inputBuffer[inputOffset++] = (byte) b;
+ if (inputOffset >= inputMaxSize) {
+ writeNextChunk(inputBuffer, 0, this.inputOffset);
+ }
+ }
+
+ @Override
+ public void write(byte[] buffer, int offset, int length)
+ throws IOException
+ {
+ while (length > 0) {
+ int chunkSize = Math.min(length, inputMaxSize - inputOffset);
+ // favor writing directly from the user buffer to avoid the extra copy
+ if (inputOffset == 0 && length > inputMaxSize) {
+ writeNextChunk(buffer, offset, chunkSize);
+ }
+ else {
+ System.arraycopy(buffer, offset, inputBuffer, inputOffset, chunkSize);
+ inputOffset += chunkSize;
+
+ if (inputOffset >= inputMaxSize) {
+ writeNextChunk(inputBuffer, 0, inputOffset);
+ }
+ }
+ length -= chunkSize;
+ offset += chunkSize;
+ }
+ }
+
+ @Override
+ public void finish()
+ throws IOException
+ {
+ if (inputOffset > 0) {
+ writeNextChunk(inputBuffer, 0, this.inputOffset);
+ }
+ }
+
+ @Override
+ public void resetState()
+ throws IOException
+ {
+ finish();
+ }
+
+ private void writeNextChunk(byte[] input, int inputOffset, int inputLength)
+ throws IOException
+ {
+ int compressedSize = Snappy.compress(input, inputOffset, inputLength, outputBuffer, 0);
+
+ writeBigEndianInt(inputLength);
+ writeBigEndianInt(compressedSize);
+ out.write(outputBuffer, 0, compressedSize);
+
+ this.inputOffset = 0;
+ }
+
+ private void writeBigEndianInt(int value)
+ throws IOException
+ {
+ out.write(value >>> 24);
+ out.write(value >>> 16);
+ out.write(value >>> 8);
+ out.write(value);
+ }
+
+ private static int compressionOverhead(int size)
+ {
+ return (size / 6) + 32;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/org/iq80/snappy/IncompatibleJvmException.java b/src/main/java/org/iq80/snappy/IncompatibleJvmException.java
new file mode 100644
index 0000000..e1dc6c8
--- /dev/null
+++ b/src/main/java/org/iq80/snappy/IncompatibleJvmException.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.iq80.snappy;
+
+public class IncompatibleJvmException
+ extends RuntimeException
+{
+ public IncompatibleJvmException(String message)
+ {
+ super(message);
+ }
+}
diff --git a/src/main/java/org/iq80/snappy/Main.java b/src/main/java/org/iq80/snappy/Main.java
deleted file mode 100644
index 9de9ed4..0000000
--- a/src/main/java/org/iq80/snappy/Main.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-public class Main
-{
- public static void main(String[] args)
- throws Exception
- {
- if ((args.length == 1) && (args[0].equals("-c"))) {
- compress();
- }
- else if ((args.length == 1) && (args[0].equals("-d"))) {
- uncompress();
- }
- else {
- usage();
- }
- }
-
- private static void usage()
- {
- System.err.println("Usage: java -jar snappy.jar OPTION");
- System.err.println("Compress or uncompress with Snappy.");
- System.err.println();
- System.err.println(" -c compress from stdin to stdout");
- System.err.println(" -d uncompress from stdin to stdout");
- System.exit(100);
- }
-
- private static void compress()
- throws IOException
- {
- copy(System.in, new SnappyOutputStream(System.out));
- }
-
- private static void uncompress()
- throws IOException
- {
- copy(new SnappyInputStream(System.in), System.out);
- }
-
- private static void copy(InputStream in, OutputStream out)
- throws IOException
- {
- byte[] buf = new byte[4096];
- while (true) {
- int r = in.read(buf);
- if (r == -1) {
- out.close();
- in.close();
- return;
- }
- out.write(buf, 0, r);
- }
- }
-}
diff --git a/src/main/java/org/iq80/snappy/SlowMemory.java b/src/main/java/org/iq80/snappy/SlowMemory.java
deleted file mode 100644
index f1f1336..0000000
--- a/src/main/java/org/iq80/snappy/SlowMemory.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-class SlowMemory
- implements Memory
-{
- @Override
- public boolean fastAccessSupported()
- {
- return false;
- }
-
- @Override
- public int lookupShort(short[] data, int index)
- {
- return data[index] & 0xFFFF;
- }
-
- @Override
- public int loadByte(byte[] data, int index)
- {
- return data[index] & 0xFF;
- }
-
- @Override
- public int loadInt(byte[] data, int index)
- {
- return (data[index] & 0xff) |
- (data[index + 1] & 0xff) << 8 |
- (data[index + 2] & 0xff) << 16 |
- (data[index + 3] & 0xff) << 24;
- }
-
- @Override
- public void copyLong(byte[] src, int srcIndex, byte[] dest, int destIndex)
- {
- for (int i = 0; i < 8; i++) {
- dest[destIndex + i] = src[srcIndex + i];
- }
- }
-
- @Override
- public long loadLong(byte[] data, int index)
- {
- return (data[index] & 0xffL) |
- (data[index + 1] & 0xffL) << 8 |
- (data[index + 2] & 0xffL) << 16 |
- (data[index + 3] & 0xffL) << 24 |
- (data[index + 4] & 0xffL) << 32 |
- (data[index + 5] & 0xffL) << 40 |
- (data[index + 6] & 0xffL) << 48 |
- (data[index + 7] & 0xffL) << 56;
- }
-
- @Override
- public void copyMemory(byte[] input, int inputIndex, byte[] output, int outputIndex, int length)
- {
- System.arraycopy(input, inputIndex, output, outputIndex, length);
- }
-}
diff --git a/src/main/java/org/iq80/snappy/Snappy.java b/src/main/java/org/iq80/snappy/Snappy.java
index ee071ef..d201f43 100644
--- a/src/main/java/org/iq80/snappy/Snappy.java
+++ b/src/main/java/org/iq80/snappy/Snappy.java
@@ -17,83 +17,60 @@
*/
package org.iq80.snappy;
-import java.io.IOException;
-import java.io.InputStream;
import java.util.Arrays;
-import static org.iq80.snappy.SnappyFramed.HEADER_BYTES;
-import static org.iq80.snappy.SnappyInternalUtils.checkArgument;
-import static org.iq80.snappy.SnappyInternalUtils.checkNotNull;
-import static org.iq80.snappy.SnappyOutputStream.STREAM_HEADER;
+import static java.lang.String.format;
+import static java.util.Objects.requireNonNull;
+import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET;
public final class Snappy
{
-
- private static final int MAX_HEADER_LENGTH = Math.max(STREAM_HEADER.length, HEADER_BYTES.length);
-
- private Snappy()
- {
- }
-
- /**
- * Uses the stream marker bytes to determine if the {@link SnappyFramedInputStream} or
- * {@link SnappyInputStream} should be used to decompress the content of source.
- *
- * @param source The compressed content to decompress. Must {@link InputStream#markSupported()
- * support} {@link InputStream#mark(int).}
- * @param verifyChecksums Indicates if the crc32-c checksums should be calculated and verified.
- * @return An appropriate {@link InputStream} implementation to decompress the content.
- * @throws IllegalArgumentException If source does not {@link InputStream#markSupported()
- * support} mark/reset or does not contain the appropriate marker bytes for either implementation.
- */
- @SuppressWarnings("deprecation")
- public static InputStream determineSnappyInputStream(InputStream source, boolean verifyChecksums)
- throws IOException
- {
- checkNotNull(source, "source is null");
- checkArgument(source.markSupported(), "source does not support mark/reset");
-
- // read the header and then reset to start of stream
- source.mark(MAX_HEADER_LENGTH);
- byte[] buffer = new byte[MAX_HEADER_LENGTH];
- int read = SnappyInternalUtils.readBytes(source, buffer, 0, MAX_HEADER_LENGTH);
- source.reset();
-
- if (read != STREAM_HEADER.length || read != HEADER_BYTES.length) {
- throw new IllegalArgumentException("invalid header");
- }
-
- if (buffer[0] == HEADER_BYTES[0]) {
- checkArgument(Arrays.equals(Arrays.copyOf(buffer, HEADER_BYTES.length), HEADER_BYTES), "invalid header");
- return new SnappyFramedInputStream(source, verifyChecksums);
- }
- else {
- checkArgument(Arrays.equals(Arrays.copyOf(buffer, STREAM_HEADER.length), STREAM_HEADER), "invalid header");
- return new SnappyInputStream(source, verifyChecksums);
- }
- }
+ private Snappy() {}
public static int getUncompressedLength(byte[] compressed, int compressedOffset)
throws CorruptionException
{
- return SnappyDecompressor.getUncompressedLength(compressed, compressedOffset);
+ long compressedAddress = ARRAY_BYTE_BASE_OFFSET + compressedOffset;
+ long compressedLimit = ARRAY_BYTE_BASE_OFFSET + compressed.length;
+
+ return SnappyRawDecompressor.getUncompressedLength(compressed, compressedAddress, compressedLimit);
}
public static byte[] uncompress(byte[] compressed, int compressedOffset, int compressedSize)
throws CorruptionException
{
- return SnappyDecompressor.uncompress(compressed, compressedOffset, compressedSize);
+ byte[] output = new byte[getUncompressedLength(compressed, compressedOffset)];
+ int uncompressedSize = uncompress(compressed, compressedOffset, compressedSize, output, 0);
+ if (uncompressedSize != output.length) {
+ throw new CorruptionException(0, format("Recorded length is %s bytes but actual length after decompression is %s bytes ",
+ output.length,
+ uncompressedSize));
+ }
+ return output;
}
public static int uncompress(byte[] compressed, int compressedOffset, int compressedSize, byte[] uncompressed, int uncompressedOffset)
throws CorruptionException
{
- return SnappyDecompressor.uncompress(compressed, compressedOffset, compressedSize, uncompressed, uncompressedOffset);
+ return uncompress(compressed, compressedOffset, compressedSize, uncompressed, uncompressedOffset, uncompressed.length - uncompressedOffset);
+ }
+
+ public static int uncompress(byte[] compressed, int compressedOffset, int compressedSize, byte[] uncompressed, int uncompressedOffset, int uncompressedLength)
+ {
+ verifyRange(compressed, compressedOffset, compressedSize);
+ verifyRange(uncompressed, uncompressedOffset, uncompressedLength);
+
+ long inputAddress = ARRAY_BYTE_BASE_OFFSET + compressedOffset;
+ long inputLimit = inputAddress + compressedSize;
+ long outputAddress = ARRAY_BYTE_BASE_OFFSET + uncompressedOffset;
+ long outputLimit = outputAddress + uncompressed.length - uncompressedOffset;
+
+ return SnappyRawDecompressor.decompress(compressed, inputAddress, inputLimit, uncompressed, outputAddress, outputLimit);
}
public static int maxCompressedLength(int sourceLength)
{
- return SnappyCompressor.maxCompressedLength(sourceLength);
+ return SnappyRawCompressor.maxCompressedLength(sourceLength);
}
public static int compress(
@@ -103,14 +80,18 @@ public static int compress(
byte[] compressed,
int compressedOffset)
{
- return SnappyCompressor.compress(uncompressed,
- uncompressedOffset,
- uncompressedLength,
- compressed,
- compressedOffset);
- }
+ verifyRange(uncompressed, uncompressedOffset, uncompressedLength);
+ verifyRange(compressed, compressedOffset, compressed.length - compressedOffset);
+
+ long inputAddress = ARRAY_BYTE_BASE_OFFSET + uncompressedOffset;
+ long inputLimit = inputAddress + uncompressedLength;
+ long outputAddress = ARRAY_BYTE_BASE_OFFSET + compressedOffset;
+ long outputLimit = outputAddress + compressed.length - compressedOffset;
+ short[] table = new short[SnappyRawCompressor.MAX_HASH_TABLE_SIZE];
+ return SnappyRawCompressor.compress(uncompressed, inputAddress, inputLimit, compressed, outputAddress, outputLimit, table);
+ }
public static byte[] compress(byte[] data)
{
byte[] compressedOut = new byte[maxCompressedLength(data.length)];
@@ -119,8 +100,11 @@ public static byte[] compress(byte[] data)
return trimmedBuffer;
}
- static final int LITERAL = 0;
- static final int COPY_1_BYTE_OFFSET = 1; // 3 bit length + 3 bits of offset in opcode
- static final int COPY_2_BYTE_OFFSET = 2;
- static final int COPY_4_BYTE_OFFSET = 3;
+ private static void verifyRange(byte[] data, int offset, int length)
+ {
+ requireNonNull(data, "data is null");
+ if (offset < 0 || length < 0 || offset + length > data.length) {
+ throw new IllegalArgumentException(format("Invalid offset or length (%s, %s) in array of length %s", offset, length, data.length));
+ }
+ }
}
diff --git a/src/main/java/org/iq80/snappy/SnappyCompressor.java b/src/main/java/org/iq80/snappy/SnappyCompressor.java
deleted file mode 100644
index 54ff780..0000000
--- a/src/main/java/org/iq80/snappy/SnappyCompressor.java
+++ /dev/null
@@ -1,519 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import java.nio.ByteOrder;
-import java.util.Arrays;
-
-import static org.iq80.snappy.Snappy.COPY_1_BYTE_OFFSET;
-import static org.iq80.snappy.Snappy.COPY_2_BYTE_OFFSET;
-import static org.iq80.snappy.Snappy.LITERAL;
-
-final class SnappyCompressor
-{
- private static final boolean NATIVE_LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
-
- // *** DO NOT CHANGE THE VALUE OF kBlockSize ***
- //
- // New Compression code chops up the input into blocks of at most
- // the following size. This ensures that back-references in the
- // output never cross kBlockSize block boundaries. This can be
- // helpful in implementing blocked decompression. However the
- // decompression code should not rely on this guarantee since older
- // compression code may not obey it.
- private static final int BLOCK_LOG = 15;
- private static final int BLOCK_SIZE = 1 << BLOCK_LOG;
-
- private static final int INPUT_MARGIN_BYTES = 15;
-
- private static final int MAX_HASH_TABLE_BITS = 14;
- private static final int MAX_HASH_TABLE_SIZE = 1 << MAX_HASH_TABLE_BITS;
-
- public static int maxCompressedLength(int sourceLength)
- {
- // Compressed data can be defined as:
- // compressed := item* literal*
- // item := literal* copy
- //
- // The trailing literal sequence has a space blowup of at most 62/60
- // since a literal of length 60 needs one tag byte + one extra byte
- // for length information.
- //
- // Item blowup is trickier to measure. Suppose the "copy" op copies
- // 4 bytes of data. Because of a special check in the encoding code,
- // we produce a 4-byte copy only if the offset is < 65536. Therefore
- // the copy op takes 3 bytes to encode, and this type of item leads
- // to at most the 62/60 blowup for representing literals.
- //
- // Suppose the "copy" op copies 5 bytes of data. If the offset is big
- // enough, it will take 5 bytes to encode the copy op. Therefore the
- // worst case here is a one-byte literal followed by a five-byte copy.
- // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
- //
- // This last factor dominates the blowup, so the final estimate is:
- return 32 + sourceLength + sourceLength / 6;
- }
-
- public static int compress(
- final byte[] uncompressed,
- final int uncompressedOffset,
- final int uncompressedLength,
- final byte[] compressed,
- final int compressedOffset)
- {
- // First write the uncompressed size to the output as a variable length int
- int compressedIndex = writeUncompressedLength(compressed, compressedOffset, uncompressedLength);
-
- int hashTableSize = getHashTableSize(uncompressedLength);
- BufferRecycler recycler = BufferRecycler.instance();
- short[] table = recycler.allocEncodingHash(hashTableSize);
-
- for (int read = 0; read < uncompressedLength; read += BLOCK_SIZE) {
- // Get encoding table for compression
- Arrays.fill(table, (short) 0);
-
- compressedIndex = compressFragment(
- uncompressed,
- uncompressedOffset + read,
- Math.min(uncompressedLength - read, BLOCK_SIZE),
- compressed,
- compressedIndex,
- table);
- }
-
- recycler.releaseEncodingHash(table);
-
- return compressedIndex - compressedOffset;
- }
-
- private static int compressFragment(
- final byte[] input,
- final int inputOffset,
- final int inputSize,
- final byte[] output,
- int outputIndex,
- final short[] table)
- {
- int ipIndex = inputOffset;
- assert inputSize <= BLOCK_SIZE;
- final int ipEndIndex = inputOffset + inputSize;
-
- int hashTableSize = getHashTableSize(inputSize);
- // todo given that hashTableSize is required to be a power of 2, this is overly complex
- final int shift = 32 - log2Floor(hashTableSize);
- assert (hashTableSize & (hashTableSize - 1)) == 0 : "table must be power of two";
- assert 0xFFFFFFFF >>> shift == hashTableSize - 1;
-
- // Bytes in [nextEmitIndex, ipIndex) will be emitted as literal bytes. Or
- // [nextEmitIndex, ipEndIndex) after the main loop.
- int nextEmitIndex = ipIndex;
-
- if (inputSize >= INPUT_MARGIN_BYTES) {
- final int ipLimit = inputOffset + inputSize - INPUT_MARGIN_BYTES;
- while (ipIndex <= ipLimit) {
- assert nextEmitIndex <= ipIndex;
-
- // The body of this loop calls EmitLiteral once and then EmitCopy one or
- // more times. (The exception is that when we're close to exhausting
- // the input we exit and emit a literal.)
- //
- // In the first iteration of this loop we're just starting, so
- // there's nothing to copy, so calling EmitLiteral once is
- // necessary. And we only start a new iteration when the
- // current iteration has determined that a call to EmitLiteral will
- // precede the next call to EmitCopy (if any).
- //
- // Step 1: Scan forward in the input looking for a 4-byte-long match.
- // If we get close to exhausting the input exit and emit a final literal.
- //
- // Heuristic match skipping: If 32 bytes are scanned with no matches
- // found, start looking only at every other byte. If 32 more bytes are
- // scanned, look at every third byte, etc.. When a match is found,
- // immediately go back to looking at every byte. This is a small loss
- // (~5% performance, ~0.1% density) for compressible data due to more
- // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
- // win since the compressor quickly "realizes" the data is incompressible
- // and doesn't bother looking for matches everywhere.
- //
- // The "skip" variable keeps track of how many bytes there are since the
- // last match; dividing it by 32 (ie. right-shifting by five) gives the
- // number of bytes to move ahead for each iteration.
- int skip = 32;
-
- int[] candidateResult = findCandidate(input, ipIndex, ipLimit, inputOffset, shift, table, skip);
- ipIndex = candidateResult[0];
- int candidateIndex = candidateResult[1];
- skip = candidateResult[2];
- if (ipIndex + bytesBetweenHashLookups(skip) > ipLimit) {
- break;
- }
-
- // Step 2: A 4-byte match has been found. We'll later see if more
- // than 4 bytes match. But, prior to the match, input
- // bytes [nextEmit, ip) are unmatched. Emit them as "literal bytes."
- assert nextEmitIndex + 16 <= ipEndIndex;
- outputIndex = emitLiteral(output, outputIndex, input, nextEmitIndex, ipIndex - nextEmitIndex, true);
-
- // Step 3: Call EmitCopy, and then see if another EmitCopy could
- // be our next move. Repeat until we find no match for the
- // input immediately after what was consumed by the last EmitCopy call.
- //
- // If we exit this loop normally then we need to call EmitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can exit
- // this loop via goto if we get close to exhausting the input.
- int[] indexes = emitCopies(input, inputOffset, inputSize, ipIndex, output, outputIndex, table, shift, candidateIndex);
- ipIndex = indexes[0];
- outputIndex = indexes[1];
- nextEmitIndex = ipIndex;
- }
- }
-
- // goto emitRemainder hack
- if (nextEmitIndex < ipEndIndex) {
- // Emit the remaining bytes as a literal
- outputIndex = emitLiteral(output, outputIndex, input, nextEmitIndex, ipEndIndex - nextEmitIndex, false);
- }
- return outputIndex;
- }
-
- private static int[] findCandidate(byte[] input, int ipIndex, int ipLimit, int inputOffset, int shift, short[] table, int skip)
- {
-
- int candidateIndex = 0;
- for (ipIndex += 1; ipIndex + bytesBetweenHashLookups(skip) <= ipLimit; ipIndex += bytesBetweenHashLookups(skip++)) {
- // hash the 4 bytes starting at the input pointer
- int currentInt = SnappyInternalUtils.loadInt(input, ipIndex);
- int hash = hashBytes(currentInt, shift);
-
- // get the position of a 4 bytes sequence with the same hash
- candidateIndex = inputOffset + table[hash];
- assert candidateIndex >= 0;
- assert candidateIndex < ipIndex;
-
- // update the hash to point to the current position
- table[hash] = (short) (ipIndex - inputOffset);
-
- // if the 4 byte sequence a the candidate index matches the sequence at the
- // current position, proceed to the next phase
- if (currentInt == SnappyInternalUtils.loadInt(input, candidateIndex)) {
- break;
- }
- }
- return new int[] {ipIndex, candidateIndex, skip};
- }
-
- private static int bytesBetweenHashLookups(int skip)
- {
- return (skip >>> 5);
- }
-
- private static int[] emitCopies(
- byte[] input,
- final int inputOffset,
- final int inputSize,
- int ipIndex,
- byte[] output,
- int outputIndex,
- short[] table,
- int shift,
- int candidateIndex)
- {
- // Step 3: Call EmitCopy, and then see if another EmitCopy could
- // be our next move. Repeat until we find no match for the
- // input immediately after what was consumed by the last EmitCopy call.
- //
- // If we exit this loop normally then we need to call EmitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can exit
- // this loop via goto if we get close to exhausting the input.
- int inputBytes;
- do {
- // We have a 4-byte match at ip, and no need to emit any
- // "literal bytes" prior to ip.
- int matched = 4 + findMatchLength(input, candidateIndex + 4, input, ipIndex + 4, inputOffset + inputSize);
- int offset = ipIndex - candidateIndex;
- assert SnappyInternalUtils.equals(input, ipIndex, input, candidateIndex, matched);
- ipIndex += matched;
-
- // emit the copy operation for this chunk
- outputIndex = emitCopy(output, outputIndex, offset, matched);
-
- // are we done?
- if (ipIndex >= inputOffset + inputSize - INPUT_MARGIN_BYTES) {
- return new int[] {ipIndex, outputIndex};
- }
-
- // We could immediately start working at ip now, but to improve
- // compression we first update table[Hash(ip - 1, ...)].
- int prevInt;
- if (SnappyInternalUtils.HAS_UNSAFE) {
- long foo = SnappyInternalUtils.loadLong(input, ipIndex - 1);
- prevInt = (int) foo;
- inputBytes = (int) (foo >>> 8);
- }
- else {
- prevInt = SnappyInternalUtils.loadInt(input, ipIndex - 1);
- inputBytes = SnappyInternalUtils.loadInt(input, ipIndex);
- }
-
- // add hash starting with previous byte
- int prevHash = hashBytes(prevInt, shift);
- table[prevHash] = (short) (ipIndex - inputOffset - 1);
-
- // update hash of current byte
- int curHash = hashBytes(inputBytes, shift);
-
- candidateIndex = inputOffset + table[curHash];
- table[curHash] = (short) (ipIndex - inputOffset);
-
- } while (inputBytes == SnappyInternalUtils.loadInt(input, candidateIndex));
- return new int[] {ipIndex, outputIndex};
- }
-
- private static int emitLiteral(
- byte[] output,
- int outputIndex,
- byte[] literal,
- final int literalIndex,
- final int length,
- final boolean allowFastPath)
- {
- SnappyInternalUtils.checkPositionIndexes(literalIndex, literalIndex + length, literal.length);
-
- int n = length - 1; // Zero-length literals are disallowed
- if (n < 60) {
- // Size fits in tag byte
- output[outputIndex++] = (byte) (LITERAL | n << 2);
-
- // The vast majority of copies are below 16 bytes, for which a
- // call to memcpy is overkill. This fast path can sometimes
- // copy up to 15 bytes too much, but that is okay in the
- // main loop, since we have a bit to go on for both sides:
- //
- // - The input will always have kInputMarginBytes = 15 extra
- // available bytes, as long as we're in the main loop, and
- // if not, allowFastPath = false.
- // - The output will always have 32 spare bytes (see
- // MaxCompressedLength).
- if (allowFastPath && length <= 16) {
- SnappyInternalUtils.copyLong(literal, literalIndex, output, outputIndex);
- SnappyInternalUtils.copyLong(literal, literalIndex + 8, output, outputIndex + 8);
- outputIndex += length;
- return outputIndex;
- }
- }
- else if (n < (1 << 8)) {
- output[outputIndex++] = (byte) (LITERAL | 59 + 1 << 2);
- output[outputIndex++] = (byte) (n);
- }
- else if (n < (1 << 16)) {
- output[outputIndex++] = (byte) (LITERAL | 59 + 2 << 2);
- output[outputIndex++] = (byte) (n);
- output[outputIndex++] = (byte) (n >>> 8);
- }
- else if (n < (1 << 24)) {
- output[outputIndex++] = (byte) (LITERAL | 59 + 3 << 2);
- output[outputIndex++] = (byte) (n);
- output[outputIndex++] = (byte) (n >>> 8);
- output[outputIndex++] = (byte) (n >>> 16);
- }
- else {
- output[outputIndex++] = (byte) (LITERAL | 59 + 4 << 2);
- output[outputIndex++] = (byte) (n);
- output[outputIndex++] = (byte) (n >>> 8);
- output[outputIndex++] = (byte) (n >>> 16);
- output[outputIndex++] = (byte) (n >>> 24);
- }
-
- SnappyInternalUtils.checkPositionIndexes(literalIndex, literalIndex + length, literal.length);
-
- System.arraycopy(literal, literalIndex, output, outputIndex, length);
- outputIndex += length;
- return outputIndex;
- }
-
- private static int emitCopyLessThan64(
- byte[] output,
- int outputIndex,
- int offset,
- int length)
- {
- assert offset >= 0;
- assert length <= 64;
- assert length >= 4;
- assert offset < 65536;
-
- if ((length < 12) && (offset < 2048)) {
- int lenMinus4 = length - 4;
- assert (lenMinus4 < 8); // Must fit in 3 bits
- output[outputIndex++] = (byte) (COPY_1_BYTE_OFFSET | ((lenMinus4) << 2) | ((offset >>> 8) << 5));
- output[outputIndex++] = (byte) (offset);
- }
- else {
- output[outputIndex++] = (byte) (COPY_2_BYTE_OFFSET | ((length - 1) << 2));
- output[outputIndex++] = (byte) (offset);
- output[outputIndex++] = (byte) (offset >>> 8);
- }
- return outputIndex;
- }
-
- private static int emitCopy(
- byte[] output,
- int outputIndex,
- int offset,
- int length)
- {
- // Emit 64 byte copies but make sure to keep at least four bytes reserved
- while (length >= 68) {
- outputIndex = emitCopyLessThan64(output, outputIndex, offset, 64);
- length -= 64;
- }
-
- // Emit an extra 60 byte copy if have too much data to fit in one copy
- if (length > 64) {
- outputIndex = emitCopyLessThan64(output, outputIndex, offset, 60);
- length -= 60;
- }
-
- // Emit remainder
- outputIndex = emitCopyLessThan64(output, outputIndex, offset, length);
- return outputIndex;
- }
-
- private static int findMatchLength(
- byte[] s1,
- int s1Index,
- byte[] s2,
- final int s2Index,
- int s2Limit)
- {
- assert (s2Limit >= s2Index);
-
- if (SnappyInternalUtils.HAS_UNSAFE) {
- int matched = 0;
-
- while (s2Index + matched <= s2Limit - 4 && SnappyInternalUtils.loadInt(s2, s2Index + matched) == SnappyInternalUtils.loadInt(s1, s1Index + matched)) {
- matched += 4;
- }
-
- if (NATIVE_LITTLE_ENDIAN && s2Index + matched <= s2Limit - 4) {
- int x = SnappyInternalUtils.loadInt(s2, s2Index + matched) ^ SnappyInternalUtils.loadInt(s1, s1Index + matched);
- int matchingBits = Integer.numberOfTrailingZeros(x);
- matched += matchingBits >> 3;
- }
- else {
- while (s2Index + matched < s2Limit && s1[s1Index + matched] == s2[s2Index + matched]) {
- ++matched;
- }
- }
- return matched;
- }
- else {
- int length = s2Limit - s2Index;
- for (int matched = 0; matched < length; matched++) {
- if (s1[s1Index + matched] != s2[s2Index + matched]) {
- return matched;
- }
- }
- return length;
- }
- }
-
- private static int getHashTableSize(int inputSize)
- {
- // Use smaller hash table when input.size() is smaller, since we
- // fill the table, incurring O(hash table size) overhead for
- // compression, and if the input is short, we won't need that
- // many hash table entries anyway.
- assert (MAX_HASH_TABLE_SIZE >= 256);
-
- int hashTableSize = 256;
- while (hashTableSize < MAX_HASH_TABLE_SIZE && hashTableSize < inputSize) {
- hashTableSize <<= 1;
- }
- assert 0 == (hashTableSize & (hashTableSize - 1)) : "hash must be power of two";
- assert hashTableSize <= MAX_HASH_TABLE_SIZE : "hash table too large";
- return hashTableSize;
-
-// // todo should be faster but is not
-// int newHashTableSize;
-// if (inputSize < 256) {
-// newHashTableSize = 256;
-// } else if (inputSize > kMaxHashTableSize) {
-// newHashTableSize = kMaxHashTableSize;
-// } else {
-// int leadingZeros = Integer.numberOfLeadingZeros(inputSize - 1);
-// newHashTableSize = 1 << (32 - leadingZeros);
-// }
-//
-// assert 0 == (newHashTableSize & (newHashTableSize - 1)) : "hash must be power of two";
-// assert newHashTableSize <= kMaxHashTableSize : "hash table too large";
-// return newHashTableSize;
- }
-
- // Any hash function will produce a valid compressed bitstream, but a good
- // hash function reduces the number of collisions and thus yields better
- // compression for compressible input, and more speed for incompressible
- // input. Of course, it doesn't hurt if the hash function is reasonably fast
- // either, as it gets called a lot.
- private static int hashBytes(int bytes, int shift)
- {
- int kMul = 0x1e35a7bd;
- return (bytes * kMul) >>> shift;
- }
-
- private static int log2Floor(int n)
- {
- return n == 0 ? -1 : 31 ^ Integer.numberOfLeadingZeros(n);
- }
-
- /**
- * Writes the uncompressed length as variable length integer.
- */
- private static int writeUncompressedLength(byte[] compressed, int compressedOffset, int uncompressedLength)
- {
- int highBitMask = 0x80;
- if (uncompressedLength < (1 << 7) && uncompressedLength >= 0) {
- compressed[compressedOffset++] = (byte) (uncompressedLength);
- }
- else if (uncompressedLength < (1 << 14) && uncompressedLength > 0) {
- compressed[compressedOffset++] = (byte) (uncompressedLength | highBitMask);
- compressed[compressedOffset++] = (byte) (uncompressedLength >>> 7);
- }
- else if (uncompressedLength < (1 << 21) && uncompressedLength > 0) {
- compressed[compressedOffset++] = (byte) (uncompressedLength | highBitMask);
- compressed[compressedOffset++] = (byte) ((uncompressedLength >>> 7) | highBitMask);
- compressed[compressedOffset++] = (byte) (uncompressedLength >>> 14);
- }
- else if (uncompressedLength < (1 << 28) && uncompressedLength > 0) {
- compressed[compressedOffset++] = (byte) (uncompressedLength | highBitMask);
- compressed[compressedOffset++] = (byte) ((uncompressedLength >>> 7) | highBitMask);
- compressed[compressedOffset++] = (byte) ((uncompressedLength >>> 14) | highBitMask);
- compressed[compressedOffset++] = (byte) (uncompressedLength >>> 21);
- }
- else {
- compressed[compressedOffset++] = (byte) (uncompressedLength | highBitMask);
- compressed[compressedOffset++] = (byte) ((uncompressedLength >>> 7) | highBitMask);
- compressed[compressedOffset++] = (byte) ((uncompressedLength >>> 14) | highBitMask);
- compressed[compressedOffset++] = (byte) ((uncompressedLength >>> 21) | highBitMask);
- compressed[compressedOffset++] = (byte) (uncompressedLength >>> 28);
- }
- return compressedOffset;
- }
-}
diff --git a/src/main/java/org/iq80/snappy/Memory.java b/src/main/java/org/iq80/snappy/SnappyConstants.java
similarity index 51%
rename from src/main/java/org/iq80/snappy/Memory.java
rename to src/main/java/org/iq80/snappy/SnappyConstants.java
index 53972d1..838322c 100644
--- a/src/main/java/org/iq80/snappy/Memory.java
+++ b/src/main/java/org/iq80/snappy/SnappyConstants.java
@@ -1,8 +1,4 @@
/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -17,19 +13,15 @@
*/
package org.iq80.snappy;
-interface Memory
+final class SnappyConstants
{
- boolean fastAccessSupported();
-
- int lookupShort(short[] data, int index);
-
- int loadByte(byte[] data, int index);
-
- int loadInt(byte[] data, int index);
-
- void copyLong(byte[] src, int srcIndex, byte[] dest, int destIndex);
+ static final int SIZE_OF_SHORT = 2;
+ static final int SIZE_OF_INT = 4;
+ static final int SIZE_OF_LONG = 8;
- long loadLong(byte[] data, int index);
+ static final int LITERAL = 0;
+ static final int COPY_1_BYTE_OFFSET = 1; // 3 bit length + 3 bits of offset in opcode
+ static final int COPY_2_BYTE_OFFSET = 2;
- void copyMemory(byte[] input, int inputIndex, byte[] output, int outputIndex, int length);
+ private SnappyConstants() {}
}
diff --git a/src/main/java/org/iq80/snappy/SnappyDecompressor.java b/src/main/java/org/iq80/snappy/SnappyDecompressor.java
deleted file mode 100644
index 0b1b01b..0000000
--- a/src/main/java/org/iq80/snappy/SnappyDecompressor.java
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import static org.iq80.snappy.SnappyInternalUtils.copyLong;
-import static org.iq80.snappy.SnappyInternalUtils.loadByte;
-import static org.iq80.snappy.SnappyInternalUtils.lookupShort;
-
-final class SnappyDecompressor
-{
- private static final int MAX_INCREMENT_COPY_OVERFLOW = 20;
-
- public static int getUncompressedLength(byte[] compressed, int compressedOffset)
- throws CorruptionException
- {
- return readUncompressedLength(compressed, compressedOffset)[0];
- }
-
- public static byte[] uncompress(byte[] compressed, int compressedOffset, int compressedSize)
- throws CorruptionException
- {
- // Read the uncompressed length from the front of the compressed input
- int[] varInt = readUncompressedLength(compressed, compressedOffset);
- int expectedLength = varInt[0];
- compressedOffset += varInt[1];
- compressedSize -= varInt[1];
-
- // allocate the uncompressed buffer
- byte[] uncompressed = new byte[expectedLength];
-
- // Process the entire input
- int uncompressedSize = decompressAllTags(
- compressed,
- compressedOffset,
- compressedSize,
- uncompressed,
- 0);
-
- if (!(expectedLength == uncompressedSize)) {
- throw new CorruptionException(String.format("Recorded length is %s bytes but actual length after decompression is %s bytes ",
- expectedLength,
- uncompressedSize));
- }
-
- return uncompressed;
- }
-
- public static int uncompress(byte[] compressed, int compressedOffset, int compressedSize, byte[] uncompressed, int uncompressedOffset)
- throws CorruptionException
- {
- // Read the uncompressed length from the front of the compressed input
- int[] varInt = readUncompressedLength(compressed, compressedOffset);
- int expectedLength = varInt[0];
- compressedOffset += varInt[1];
- compressedSize -= varInt[1];
-
- SnappyInternalUtils.checkArgument(expectedLength <= uncompressed.length - uncompressedOffset,
- "Uncompressed length %s must be less than %s", expectedLength, uncompressed.length - uncompressedOffset);
-
- // Process the entire input
- int uncompressedSize = decompressAllTags(
- compressed,
- compressedOffset,
- compressedSize,
- uncompressed,
- uncompressedOffset);
-
- if (!(expectedLength == uncompressedSize)) {
- throw new CorruptionException(String.format("Recorded length is %s bytes but actual length after decompression is %s bytes ",
- expectedLength,
- uncompressedSize));
- }
-
- return expectedLength;
- }
-
- private static int decompressAllTags(
- final byte[] input,
- final int inputOffset,
- final int inputSize,
- final byte[] output,
- final int outputOffset)
- throws CorruptionException
- {
- final int outputLimit = output.length;
-
- final int ipLimit = inputOffset + inputSize;
- int opIndex = outputOffset;
- int ipIndex = inputOffset;
-
- while (ipIndex < ipLimit - 5) {
- int opCode = loadByte(input, ipIndex++);
- int entry = lookupShort(opLookupTable, opCode);
- int trailerBytes = entry >>> 11;
- int trailer = readTrailer(input, ipIndex, trailerBytes);
-
- // advance the ipIndex past the op codes
- ipIndex += entry >>> 11;
- int length = entry & 0xff;
-
- if ((opCode & 0x3) == Snappy.LITERAL) {
- int literalLength = length + trailer;
- copyLiteral(input, ipIndex, output, opIndex, literalLength);
- ipIndex += literalLength;
- opIndex += literalLength;
- }
- else {
- // copyOffset/256 is encoded in bits 8..10. By just fetching
- // those bits, we get copyOffset (since the bit-field starts at
- // bit 8).
- int copyOffset = entry & 0x700;
- copyOffset += trailer;
-
- // inline to force hot-spot to keep inline
- //
- // Equivalent to incrementalCopy (below) except that it can write up to ten extra
- // bytes after the end of the copy, and that it is faster.
- //
- // The main part of this loop is a simple copy of eight bytes at a time until
- // we've copied (at least) the requested amount of bytes. However, if op and
- // src are less than eight bytes apart (indicating a repeating pattern of
- // length < 8), we first need to expand the pattern in order to get the correct
- // results. For instance, if the buffer looks like this, with the eight-byte
- // and patterns marked as intervals:
- //
- // abxxxxxxxxxxxx
- // [------] src
- // [------] op
- //
- // a single eight-byte copy from to will repeat the pattern once,
- // after which we can move two bytes without moving :
- //
- // ababxxxxxxxxxx
- // [------] src
- // [------] op
- //
- // and repeat the exercise until the two no longer overlap.
- //
- // This allows us to do very well in the special case of one single byte
- // repeated many times, without taking a big hit for more general cases.
- //
- // The worst case of extra writing past the end of the match occurs when
- // op - src == 1 and len == 1; the last copy will read from byte positions
- // [0..7] and write to [4..11], whereas it was only supposed to write to
- // position 1. Thus, ten excess bytes.
- {
- int spaceLeft = outputLimit - opIndex;
- int srcIndex = opIndex - copyOffset;
- if (srcIndex < outputOffset) {
- throw new CorruptionException("Invalid copy offset for opcode starting at " + (ipIndex - trailerBytes - 1));
- }
-
- if (length <= 16 && copyOffset >= 8 && spaceLeft >= 16) {
- // Fast path, used for the majority (70-80%) of dynamic invocations.
- copyLong(output, srcIndex, output, opIndex);
- copyLong(output, srcIndex + 8, output, opIndex + 8);
- }
- else if (spaceLeft >= length + MAX_INCREMENT_COPY_OVERFLOW) {
- incrementalCopyFastPath(output, srcIndex, opIndex, length);
- }
- else {
- incrementalCopy(output, srcIndex, output, opIndex, length);
- }
- }
- opIndex += length;
- }
- }
-
-
- for (; ipIndex < ipLimit; ) {
- int[] result = decompressTagSlow(input, ipIndex, output, outputLimit, outputOffset, opIndex);
- ipIndex = result[0];
- opIndex = result[1];
- }
-
- return opIndex - outputOffset;
- }
-
- /**
- * This is a second copy of the inner loop of decompressTags used when near the end
- * of the input. The key difference is the reading of the trailer bytes. The fast
- * code does a blind read of the next 4 bytes as an int, and this code assembles
- * the int byte-by-byte to assure that the array is not over run. The reason this
- * code path is separate is the if condition to choose between these two seemingly
- * small differences costs like 10-20% of the throughput. I'm hoping in future
- * versions of hot-spot this code can be integrated into the main loop but for now
- * it is worth the extra maintenance pain to get the extra 10-20%.
- */
- private static int[] decompressTagSlow(byte[] input, int ipIndex, byte[] output, int outputLimit, int outputOffset, int opIndex)
- throws CorruptionException
- {
- // read the op code
- int opCode = loadByte(input, ipIndex++);
- int entry = lookupShort(opLookupTable, opCode);
- int trailerBytes = entry >>> 11;
- //
- // Key difference here
- //
- int trailer = 0;
- switch (trailerBytes) {
- case 4:
- trailer = (input[ipIndex + 3] & 0xff) << 24;
- case 3:
- trailer |= (input[ipIndex + 2] & 0xff) << 16;
- case 2:
- trailer |= (input[ipIndex + 1] & 0xff) << 8;
- case 1:
- trailer |= (input[ipIndex] & 0xff);
- }
-
- // advance the ipIndex past the op codes
- ipIndex += trailerBytes;
- int length = entry & 0xff;
-
- if ((opCode & 0x3) == Snappy.LITERAL) {
- int literalLength = length + trailer;
- copyLiteral(input, ipIndex, output, opIndex, literalLength);
- ipIndex += literalLength;
- opIndex += literalLength;
- }
- else {
- // copyOffset/256 is encoded in bits 8..10. By just fetching
- // those bits, we get copyOffset (since the bit-field starts at
- // bit 8).
- int copyOffset = entry & 0x700;
- copyOffset += trailer;
-
- // inline to force hot-spot to keep inline
- {
- int spaceLeft = outputLimit - opIndex;
- int srcIndex = opIndex - copyOffset;
-
- if (srcIndex < outputOffset) {
- throw new CorruptionException("Invalid copy offset for opcode starting at " + (ipIndex - trailerBytes - 1));
- }
-
- if (length <= 16 && copyOffset >= 8 && spaceLeft >= 16) {
- // Fast path, used for the majority (70-80%) of dynamic invocations.
- copyLong(output, srcIndex, output, opIndex);
- copyLong(output, srcIndex + 8, output, opIndex + 8);
- }
- else if (spaceLeft >= length + MAX_INCREMENT_COPY_OVERFLOW) {
- incrementalCopyFastPath(output, srcIndex, opIndex, length);
- }
- else {
- incrementalCopy(output, srcIndex, output, opIndex, length);
- }
- }
- opIndex += length;
- }
- return new int[] {ipIndex, opIndex};
- }
-
- private static int readTrailer(byte[] data, int index, int bytes)
- {
- return SnappyInternalUtils.loadInt(data, index) & wordmask[bytes];
- }
-
- private static void copyLiteral(byte[] input, int ipIndex, byte[] output, int opIndex, int length)
- throws CorruptionException
- {
- assert length > 0;
- assert ipIndex >= 0;
- assert opIndex >= 0;
-
- int spaceLeft = output.length - opIndex;
- int readableBytes = input.length - ipIndex;
-
- if (readableBytes < length || spaceLeft < length) {
- throw new CorruptionException("Corrupt literal length");
- }
-
- if (length <= 16 && spaceLeft >= 16 && readableBytes >= 16) {
- copyLong(input, ipIndex, output, opIndex);
- copyLong(input, ipIndex + 8, output, opIndex + 8);
- }
- else {
- int fastLength = length & 0xFFFFFFF8;
- if (fastLength <= 64) {
- // copy long-by-long
- for (int i = 0; i < fastLength; i += 8) {
- copyLong(input, ipIndex + i, output, opIndex + i);
- }
-
- // copy byte-by-byte
- int slowLength = length & 0x7;
- // NOTE: This is not a manual array copy. We are copying an overlapping region
- // and we want input data to repeat as it is recopied. see incrementalCopy below.
- //noinspection ManualArrayCopy
- for (int i = 0; i < slowLength; i += 1) {
- output[opIndex + fastLength + i] = input[ipIndex + fastLength + i];
- }
- }
- else {
- SnappyInternalUtils.copyMemory(input, ipIndex, output, opIndex, length);
- }
- }
- }
-
- /**
- * Copy "len" bytes from "src" to "op", one byte at a time. Used for
- * handling COPY operations where the input and output regions may
- * overlap. For example, suppose:
- * src == "ab"
- * op == src + 2
- * len == 20
- *
- * After incrementalCopy, the result will have
- * eleven copies of "ab"
- * ababababababababababab
- * Note that this does not match the semantics of either memcpy()
- * or memmove().
- */
- private static void incrementalCopy(byte[] src, int srcIndex, byte[] op, int opIndex, int length)
- {
- do {
- op[opIndex++] = src[srcIndex++];
- } while (--length > 0);
- }
-
- private static void incrementalCopyFastPath(byte[] output, int srcIndex, int opIndex, int length)
- {
- int copiedLength = 0;
- while ((opIndex + copiedLength) - srcIndex < 8) {
- copyLong(output, srcIndex, output, opIndex + copiedLength);
- copiedLength += (opIndex + copiedLength) - srcIndex;
- }
-
- for (int i = 0; i < length - copiedLength; i += 8) {
- copyLong(output, srcIndex + i, output, opIndex + copiedLength + i);
- }
- }
-
- // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
- private static final int[] wordmask = new int[] {
- 0, 0xff, 0xffff, 0xffffff, 0xffffffff
- };
-
- // Data stored per entry in lookup table:
- // Range Bits-used Description
- // ------------------------------------
- // 1..64 0..7 Literal/copy length encoded in opcode byte
- // 0..7 8..10 Copy offset encoded in opcode byte / 256
- // 0..4 11..13 Extra bytes after opcode
- //
- // We use eight bits for the length even though 7 would have sufficed
- // because of efficiency reasons:
- // (1) Extracting a byte is faster than a bit-field
- // (2) It properly aligns copy offset so we do not need a <<8
- private static final short[] opLookupTable = new short[] {
- 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
- 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
- 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
- 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
- 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
- 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
- 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
- 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
- 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
- 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
- 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
- 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
- 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
- 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
- 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
- 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
- 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
- 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
- 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
- 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
- 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
- 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
- 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
- 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
- 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
- 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
- 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
- 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
- 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
- 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
- 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
- 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
- };
-
- /**
- * Reads the variable length integer encoded a the specified offset, and
- * returns this length with the number of bytes read.
- */
- private static int[] readUncompressedLength(byte[] compressed, int compressedOffset)
- throws CorruptionException
- {
- int result;
- int bytesRead = 0;
- {
- int b = compressed[compressedOffset + bytesRead++] & 0xFF;
- result = b & 0x7f;
- if ((b & 0x80) != 0) {
- b = compressed[compressedOffset + bytesRead++] & 0xFF;
- result |= (b & 0x7f) << 7;
- if ((b & 0x80) != 0) {
- b = compressed[compressedOffset + bytesRead++] & 0xFF;
- result |= (b & 0x7f) << 14;
- if ((b & 0x80) != 0) {
- b = compressed[compressedOffset + bytesRead++] & 0xFF;
- result |= (b & 0x7f) << 21;
- if ((b & 0x80) != 0) {
- b = compressed[compressedOffset + bytesRead++] & 0xFF;
- result |= (b & 0x7f) << 28;
- if ((b & 0x80) != 0) {
- throw new CorruptionException("last byte of compressed length int has high bit set");
- }
- }
- }
- }
- }
- }
- return new int[] {result, bytesRead};
- }
-}
diff --git a/src/main/java/org/iq80/snappy/SnappyFramedInputStream.java b/src/main/java/org/iq80/snappy/SnappyFramedInputStream.java
index a3de8c5..467b21c 100644
--- a/src/main/java/org/iq80/snappy/SnappyFramedInputStream.java
+++ b/src/main/java/org/iq80/snappy/SnappyFramedInputStream.java
@@ -17,29 +17,221 @@
*/
package org.iq80.snappy;
+
+import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
-import static org.iq80.snappy.SnappyFramed.COMPRESSED_DATA_FLAG;
-import static org.iq80.snappy.SnappyFramed.HEADER_BYTES;
-import static org.iq80.snappy.SnappyFramed.STREAM_IDENTIFIER_FLAG;
-import static org.iq80.snappy.SnappyFramed.UNCOMPRESSED_DATA_FLAG;
import static org.iq80.snappy.SnappyFramedOutputStream.MAX_BLOCK_SIZE;
+import static java.lang.Math.min;
/**
* Implements the x-snappy-framed as an {@link InputStream}.
*/
-public class SnappyFramedInputStream
- extends AbstractSnappyInputStream
+public final class SnappyFramedInputStream
+ extends InputStream
{
+ private final InputStream in;
+ private final byte[] frameHeader;
+ private final boolean verifyChecksums;
+
+ /**
+ * A single frame read from the underlying {@link InputStream}.
+ */
+ private byte[] input = new byte[0];
+ /**
+ * The decompressed data from {@link #input}.
+ */
+ private byte[] uncompressed = new byte[0];
+ /**
+ * Indicates if this instance has been closed.
+ */
+ private boolean closed;
+ /**
+ * Indicates if we have reached the EOF on {@link #in}.
+ */
+ private boolean eof;
+ /**
+ * The position in {@link #input} to read to.
+ */
+ private int valid;
+ /**
+ * The next position to read from {@link #buffer}.
+ */
+ private int position;
+ /**
+ * Buffer is a reference to the real buffer of uncompressed data for the
+ * current block: uncompressed if the block is compressed, or input if it is
+ * not.
+ */
+ private byte[] buffer;
+
+ public SnappyFramedInputStream(InputStream in)
+ throws IOException
+ {
+ this(in, true);
+ }
+
public SnappyFramedInputStream(InputStream in, boolean verifyChecksums)
throws IOException
{
- super(in, MAX_BLOCK_SIZE, 4, verifyChecksums, HEADER_BYTES);
+ this.in = in;
+ this.verifyChecksums = verifyChecksums;
+ allocateBuffersBasedOnSize(MAX_BLOCK_SIZE + 5);
+ this.frameHeader = new byte[4];
+
+ // stream must begin with stream header
+ byte[] actualHeader = new byte[SnappyFramed.HEADER_BYTES.length];
+
+ int read = SnappyInternalUtils.readBytes(in, actualHeader, 0, actualHeader.length);
+ if (read < SnappyFramed.HEADER_BYTES.length) {
+ throw new EOFException("encountered EOF while reading stream header");
+ }
+ if (!Arrays.equals(SnappyFramed.HEADER_BYTES, actualHeader)) {
+ throw new IOException("invalid stream header");
+ }
+ }
+
+ @Override
+ public int read()
+ throws IOException
+ {
+ if (closed) {
+ return -1;
+ }
+ if (!ensureBuffer()) {
+ return -1;
+ }
+ return buffer[position++] & 0xFF;
+ }
+
+ @Override
+ public int read(byte[] output, int offset, int length)
+ throws IOException
+ {
+ SnappyInternalUtils.checkNotNull(output, "output is null");
+ SnappyInternalUtils.checkPositionIndexes(offset, offset + length, output.length);
+ if (closed) {
+ throw new IOException("Stream is closed");
+ }
+
+ if (length == 0) {
+ return 0;
+ }
+ if (!ensureBuffer()) {
+ return -1;
+ }
+
+ int size = min(length, available());
+ System.arraycopy(buffer, position, output, offset, size);
+ position += size;
+ return size;
+ }
+
+ @Override
+ public int available()
+ throws IOException
+ {
+ if (closed) {
+ return 0;
+ }
+ return valid - position;
}
@Override
- protected FrameMetaData getFrameMetaData(byte[] frameHeader)
+ public void close()
+ throws IOException
+ {
+ try {
+ in.close();
+ }
+ finally {
+ if (!closed) {
+ closed = true;
+ }
+ }
+ }
+
+ private boolean ensureBuffer()
+ throws IOException
+ {
+ if (available() > 0) {
+ return true;
+ }
+ if (eof) {
+ return false;
+ }
+
+ if (!readBlockHeader()) {
+ eof = true;
+ return false;
+ }
+
+ // get action based on header
+ FrameMetaData frameMetaData = getFrameMetaData(frameHeader);
+
+ if (FrameAction.SKIP == frameMetaData.frameAction) {
+ SnappyInternalUtils.skip(in, frameMetaData.length);
+ return ensureBuffer();
+ }
+
+ if (frameMetaData.length > input.length) {
+ allocateBuffersBasedOnSize(frameMetaData.length);
+ }
+
+ int actualRead = SnappyInternalUtils.readBytes(in, input, 0, frameMetaData.length);
+ if (actualRead != frameMetaData.length) {
+ throw new EOFException("unexpected EOF when reading frame");
+ }
+
+ FrameData frameData = getFrameData(input);
+
+ if (FrameAction.UNCOMPRESS == frameMetaData.frameAction) {
+ int uncompressedLength = Snappy.getUncompressedLength(input, frameData.offset);
+
+ if (uncompressedLength > uncompressed.length) {
+ uncompressed = new byte[uncompressedLength];
+ }
+
+ this.valid = Snappy.uncompress(input, frameData.offset, actualRead - frameData.offset, uncompressed, 0);
+ this.buffer = uncompressed;
+ this.position = 0;
+ }
+ else {
+ // we need to start reading at the offset
+ this.position = frameData.offset;
+ this.buffer = input;
+ // valid is until the end of the read data, regardless of offset
+ // indicating where we start
+ this.valid = actualRead;
+ }
+
+ if (verifyChecksums) {
+ int actualCrc32c = Crc32C.maskedCrc32c(buffer, position, valid - position);
+ if (frameData.checkSum != actualCrc32c) {
+ throw new IOException("Corrupt input: invalid checksum");
+ }
+ }
+
+ return true;
+ }
+
+ private void allocateBuffersBasedOnSize(int size)
+ {
+ if (input.length < size) {
+ input = new byte[size];
+ }
+ if (uncompressed.length < size) {
+ uncompressed = new byte[size];
+ }
+ }
+
+ /**
+ * Use the content of the frameHeader to describe what type of frame we have
+ * and the action to take.
+ */
+ private static FrameMetaData getFrameMetaData(byte[] frameHeader)
throws IOException
{
int length = (frameHeader[1] & 0xFF);
@@ -50,15 +242,15 @@ protected FrameMetaData getFrameMetaData(byte[] frameHeader)
FrameAction frameAction;
int flag = frameHeader[0] & 0xFF;
switch (flag) {
- case COMPRESSED_DATA_FLAG:
+ case SnappyFramed.COMPRESSED_DATA_FLAG:
frameAction = FrameAction.UNCOMPRESS;
minLength = 5;
break;
- case UNCOMPRESSED_DATA_FLAG:
+ case SnappyFramed.UNCOMPRESSED_DATA_FLAG:
frameAction = FrameAction.RAW;
minLength = 5;
break;
- case STREAM_IDENTIFIER_FLAG:
+ case SnappyFramed.STREAM_IDENTIFIER_FLAG:
if (length != 6) {
throw new IOException("stream identifier chunk with invalid length: " + length);
}
@@ -83,8 +275,13 @@ protected FrameMetaData getFrameMetaData(byte[] frameHeader)
return new FrameMetaData(frameAction, length);
}
- @Override
- protected FrameData getFrameData(byte[] frameHeader, byte[] content, int length)
+ /**
+ * Extract frame data
+ *
+ * @param content The content of the frame. Content begins at index {@code 0}.
+ * @return Metadata about the content of the frame.
+ */
+ private static FrameData getFrameData(byte[] content)
{
// crc is contained in the frame content
int crc32c = (content[3] & 0xFF) << 24 |
@@ -94,4 +291,53 @@ protected FrameData getFrameData(byte[] frameHeader, byte[] content, int length)
return new FrameData(crc32c, 4);
}
+
+ private boolean readBlockHeader()
+ throws IOException
+ {
+ int read = SnappyInternalUtils.readBytes(in, frameHeader, 0, frameHeader.length);
+
+ if (read == -1) {
+ return false;
+ }
+
+ if (read < frameHeader.length) {
+ throw new EOFException("encountered EOF while reading block header");
+ }
+
+ return true;
+ }
+
+ private enum FrameAction
+ {
+ RAW, SKIP, UNCOMPRESS
+ }
+
+ private static final class FrameMetaData
+ {
+ final int length;
+ final FrameAction frameAction;
+
+ /**
+ * @param frameAction
+ * @param length
+ */
+ public FrameMetaData(FrameAction frameAction, int length)
+ {
+ this.frameAction = frameAction;
+ this.length = length;
+ }
+ }
+
+ private static final class FrameData
+ {
+ final int checkSum;
+ final int offset;
+
+ public FrameData(int checkSum, int offset)
+ {
+ this.checkSum = checkSum;
+ this.offset = offset;
+ }
+ }
}
diff --git a/src/main/java/org/iq80/snappy/SnappyFramedOutputStream.java b/src/main/java/org/iq80/snappy/SnappyFramedOutputStream.java
index e625ea7..fa18f40 100644
--- a/src/main/java/org/iq80/snappy/SnappyFramedOutputStream.java
+++ b/src/main/java/org/iq80/snappy/SnappyFramedOutputStream.java
@@ -20,16 +20,11 @@
import java.io.IOException;
import java.io.OutputStream;
-import static org.iq80.snappy.SnappyFramed.COMPRESSED_DATA_FLAG;
-import static org.iq80.snappy.SnappyFramed.HEADER_BYTES;
-import static org.iq80.snappy.SnappyFramed.UNCOMPRESSED_DATA_FLAG;
-import static org.iq80.snappy.SnappyInternalUtils.checkArgument;
-
/**
* Implements the x-snappy-framed as an {@link OutputStream}.
*/
public final class SnappyFramedOutputStream
- extends AbstractSnappyOutputStream
+ extends OutputStream
{
/**
* We place an additional restriction that the uncompressed data in
@@ -41,42 +36,217 @@ public final class SnappyFramedOutputStream
public static final int DEFAULT_BLOCK_SIZE = MAX_BLOCK_SIZE;
public static final double DEFAULT_MIN_COMPRESSION_RATIO = 0.85d;
+ private final int blockSize;
+ private final byte[] buffer;
+ private final byte[] outputBuffer;
+ private final double minCompressionRatio;
+ private final OutputStream out;
+ private final boolean writeChecksums;
+
+ private int position;
+ private boolean closed;
+ /**
+ * Creates a Snappy output stream to write data to the specified underlying output stream.
+ *
+ * @param out the underlying output stream
+ */
public SnappyFramedOutputStream(OutputStream out)
throws IOException
{
- this(out, DEFAULT_BLOCK_SIZE, DEFAULT_MIN_COMPRESSION_RATIO);
+ this(out, true);
+ }
+
+ /**
+ * Creates a Snappy output stream to write data to the specified underlying output stream.
+ *
+ * @param out the underlying output stream
+ */
+ public SnappyFramedOutputStream(OutputStream out, int blockSize, double minCompressionRatio)
+ throws IOException
+ {
+ this(out, true, blockSize, minCompressionRatio);
+ }
+
+ /**
+ * Creates a Snappy output stream with block checksums disabled. This is only useful for
+ * apples-to-apples benchmarks with other compressors that do not perform block checksums.
+ *
+ * @param out the underlying output stream
+ */
+ public static SnappyFramedOutputStream newChecksumFreeBenchmarkOutputStream(OutputStream out)
+ throws IOException
+ {
+ return new SnappyFramedOutputStream(out, false);
+ }
+
+ private SnappyFramedOutputStream(OutputStream out, boolean writeChecksums)
+ throws IOException
+ {
+ this(out, writeChecksums, DEFAULT_BLOCK_SIZE, DEFAULT_MIN_COMPRESSION_RATIO);
}
- public SnappyFramedOutputStream(OutputStream out, int blockSize,
- double minCompressionRatio)
+ private SnappyFramedOutputStream(OutputStream out, boolean writeChecksums, int blockSize, double minCompressionRatio)
throws IOException
{
- super(out, blockSize, minCompressionRatio);
- checkArgument(blockSize > 0 && blockSize <= MAX_BLOCK_SIZE, "blockSize must be in (0, 65536]", blockSize);
+ this.out = SnappyInternalUtils.checkNotNull(out, "out is null");
+ this.writeChecksums = writeChecksums;
+ SnappyInternalUtils.checkArgument(minCompressionRatio > 0 && minCompressionRatio <= 1.0, "minCompressionRatio %1s must be between (0,1.0].", minCompressionRatio);
+ this.minCompressionRatio = minCompressionRatio;
+ this.blockSize = blockSize;
+ this.buffer = new byte[blockSize];
+ this.outputBuffer = new byte[Snappy.maxCompressedLength(blockSize)];
+
+ out.write(SnappyFramed.HEADER_BYTES);
+ SnappyInternalUtils.checkArgument(blockSize > 0 && blockSize <= MAX_BLOCK_SIZE, "blockSize must be in (0, 65536]", blockSize);
}
@Override
- protected void writeHeader(OutputStream out)
+ public void write(int b)
throws IOException
{
- out.write(HEADER_BYTES);
+ if (closed) {
+ throw new IOException("Stream is closed");
+ }
+ if (position >= blockSize) {
+ flushBuffer();
+ }
+ buffer[position++] = (byte) b;
+ }
+
+ @Override
+ public void write(byte[] input, int offset, int length)
+ throws IOException
+ {
+ SnappyInternalUtils.checkNotNull(input, "input is null");
+ SnappyInternalUtils.checkPositionIndexes(offset, offset + length, input.length);
+ if (closed) {
+ throw new IOException("Stream is closed");
+ }
+
+ int free = blockSize - position;
+
+ // easy case: enough free space in buffer for entire input
+ if (free >= length) {
+ copyToBuffer(input, offset, length);
+ return;
+ }
+
+ // fill partial buffer as much as possible and flush
+ if (position > 0) {
+ copyToBuffer(input, offset, free);
+ flushBuffer();
+ offset += free;
+ length -= free;
+ }
+
+ // write remaining full blocks directly from input array
+ while (length >= blockSize) {
+ writeCompressed(input, offset, blockSize);
+ offset += blockSize;
+ length -= blockSize;
+ }
+
+ // copy remaining partial block into now-empty buffer
+ copyToBuffer(input, offset, length);
+ }
+
+ @Override
+ public void flush()
+ throws IOException
+ {
+ if (closed) {
+ throw new IOException("Stream is closed");
+ }
+ flushBuffer();
+ out.flush();
+ }
+
+ @Override
+ public void close()
+ throws IOException
+ {
+ if (closed) {
+ return;
+ }
+ try {
+ flush();
+ out.close();
+ }
+ finally {
+ closed = true;
+ }
+ }
+
+ private void copyToBuffer(byte[] input, int offset, int length)
+ {
+ System.arraycopy(input, offset, buffer, position, length);
+ position += length;
}
/**
- * Each chunk consists first a single byte of chunk identifier, then a
- * three-byte little-endian length of the chunk in bytes (from 0 to
- * 16777215, inclusive), and then the data if any. The four bytes of chunk
- * header is not counted in the data length.
+ * Compresses and writes out any buffered data. This does nothing if there
+ * is no currently buffered data.
*/
- @Override
- protected void writeBlock(OutputStream out, byte[] data, int offset, int length, boolean compressed, int crc32c)
+ private void flushBuffer()
+ throws IOException
+ {
+ if (position > 0) {
+ writeCompressed(buffer, 0, position);
+ position = 0;
+ }
+ }
+
+ /**
+ * {@link Crc32C#maskedCrc32c(byte[], int, int) Calculates} the crc, compresses
+ * the data, determines if the compression ratio is acceptable and calls
+ * {@link #writeBlock(OutputStream, byte[], int, int, boolean, int)} to
+ * actually write the frame.
+ *
+ * @param input The byte[] containing the raw data to be compressed.
+ * @param offset The offset into input where the data starts.
+ * @param length The amount of data in input.
+ */
+ private void writeCompressed(byte[] input, int offset, int length)
+ throws IOException
+ {
+ // crc is based on the user supplied input data
+ int crc32c = writeChecksums ? Crc32C.maskedCrc32c(input, offset, length) : 0;
+
+ int compressed = Snappy.compress(input,
+ offset,
+ length,
+ outputBuffer,
+ 0);
+
+ // only use the compressed data if compression ratio is <= the minCompressionRatio
+ if (((double) compressed / (double) length) <= minCompressionRatio) {
+ writeBlock(out, outputBuffer, 0, compressed, true, crc32c);
+ }
+ else {
+ // otherwise use the uncompressed data.
+ writeBlock(out, input, offset, length, false, crc32c);
+ }
+ }
+
+ /**
+ * Write a frame (block) to out.
+ *
+ * @param out The {@link OutputStream} to write to.
+ * @param data The data to write.
+ * @param offset The offset in data to start at.
+ * @param length The length of data to use.
+ * @param compressed Indicates if data is the compressed or raw content.
+ * This is based on whether the compression ratio desired is
+ * reached.
+ * @param crc32c The calculated checksum.
+ */
+ private static void writeBlock(OutputStream out, byte[] data, int offset, int length, boolean compressed, int crc32c)
throws IOException
{
- out.write(compressed ? COMPRESSED_DATA_FLAG : UNCOMPRESSED_DATA_FLAG);
+ out.write(compressed ? SnappyFramed.COMPRESSED_DATA_FLAG : SnappyFramed.UNCOMPRESSED_DATA_FLAG);
- // the length written out to the header is both the checksum and the
- // frame
+ // the length written out to the header is both the checksum and the frame
int headerLength = length + 4;
// write length
diff --git a/src/main/java/org/iq80/snappy/SnappyInputStream.java b/src/main/java/org/iq80/snappy/SnappyInputStream.java
deleted file mode 100644
index 19afb92..0000000
--- a/src/main/java/org/iq80/snappy/SnappyInputStream.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-
-import static java.lang.String.format;
-import static org.iq80.snappy.SnappyOutputStream.MAX_BLOCK_SIZE;
-import static org.iq80.snappy.SnappyOutputStream.STREAM_HEADER;
-
-/**
- * This class implements an input stream for reading Snappy compressed data
- * of the format produced by {@link SnappyOutputStream}.
- *
- * NOTE:This implementation cannot read compressed data produced
- * by {@link SnappyFramedOutputStream}.
- *
- *
- * @deprecated Prefer the use of {@link SnappyFramedInputStream} which implements
- * the standard {@code x-snappy-framed} specification.
- */
-@Deprecated
-public class SnappyInputStream
- extends AbstractSnappyInputStream
-{
- private static final int HEADER_LENGTH = 7;
-
- /**
- * Creates a Snappy input stream to read data from the specified underlying input stream.
- *
- * @param in the underlying input stream
- */
- public SnappyInputStream(InputStream in)
- throws IOException
- {
- this(in, true);
- }
-
- /**
- * Creates a Snappy input stream to read data from the specified underlying input stream.
- *
- * @param in the underlying input stream
- * @param verifyChecksums if true, checksums in input stream will be verified
- */
- public SnappyInputStream(InputStream in, boolean verifyChecksums)
- throws IOException
- {
- super(in, MAX_BLOCK_SIZE, HEADER_LENGTH, verifyChecksums, STREAM_HEADER);
- }
-
- @Override
- protected FrameMetaData getFrameMetaData(byte[] frameHeader)
- throws IOException
- {
- int x = frameHeader[0] & 0xFF;
-
- int a = frameHeader[1] & 0xFF;
- int b = frameHeader[2] & 0xFF;
- int length = (a << 8) | b;
-
- FrameAction action;
- switch (x) {
- case 0x00:
- action = FrameAction.RAW;
- break;
- case 0x01:
- action = FrameAction.UNCOMPRESS;
- break;
- case 's':
- if (!Arrays.equals(STREAM_HEADER, frameHeader)) {
- throw new IOException(format("invalid compressed flag in header: 0x%02x", x));
- }
- action = FrameAction.SKIP;
- length = 0;
- break;
- default:
- throw new IOException(format("invalid compressed flag in header: 0x%02x", x));
- }
-
- if (((length <= 0) || (length > MAX_BLOCK_SIZE)) && action != FrameAction.SKIP) {
- throw new IOException("invalid block size in header: " + length);
- }
-
- return new FrameMetaData(action, length);
- }
-
- @Override
- protected FrameData getFrameData(byte[] frameHeader, byte[] content, int length)
- {
- // crc is contained in the frame header
- int crc32c = (frameHeader[3] & 0xFF) << 24 |
- (frameHeader[4] & 0xFF) << 16 |
- (frameHeader[5] & 0xFF) << 8 |
- (frameHeader[6] & 0xFF);
-
- return new FrameData(crc32c, 0);
- }
-}
diff --git a/src/main/java/org/iq80/snappy/SnappyInternalUtils.java b/src/main/java/org/iq80/snappy/SnappyInternalUtils.java
index b2b00c6..3249194 100644
--- a/src/main/java/org/iq80/snappy/SnappyInternalUtils.java
+++ b/src/main/java/org/iq80/snappy/SnappyInternalUtils.java
@@ -19,100 +19,10 @@
import java.io.IOException;
import java.io.InputStream;
-import java.nio.ByteOrder;
final class SnappyInternalUtils
{
- private SnappyInternalUtils()
- {
- }
-
- private static final Memory memory;
-
- static {
- // Try to only load one implementation of Memory to assure the call sites are monomorphic (fast)
- Memory memoryInstance = null;
-
- // TODO enable UnsafeMemory on big endian machines
- //
- // The current UnsafeMemory code assumes the machine is little endian, and will
- // not work correctly on big endian CPUs. For now, we will disable UnsafeMemory on
- // big endian machines. This will make the code significantly slower on big endian.
- // In the future someone should add the necessary flip bytes calls to make this
- // work efficiently on big endian machines.
- if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) {
- try {
- Class extends Memory> unsafeMemoryClass = SnappyInternalUtils.class.getClassLoader().loadClass("org.iq80.snappy.UnsafeMemory").asSubclass(Memory.class);
- Memory unsafeMemory = unsafeMemoryClass.newInstance();
- if (unsafeMemory.loadInt(new byte[4], 0) == 0) {
- memoryInstance = unsafeMemory;
- }
- }
- catch (Throwable ignored) {
- }
- }
- if (memoryInstance == null) {
- try {
- Class extends Memory> slowMemoryClass = SnappyInternalUtils.class.getClassLoader().loadClass("org.iq80.snappy.SlowMemory").asSubclass(Memory.class);
- Memory slowMemory = slowMemoryClass.newInstance();
- if (slowMemory.loadInt(new byte[4], 0) == 0) {
- memoryInstance = slowMemory;
- }
- else {
- throw new AssertionError("SlowMemory class is broken!");
- }
- }
- catch (Throwable ignored) {
- throw new AssertionError("Could not find SlowMemory class");
- }
- }
- memory = memoryInstance;
- }
-
- static final boolean HAS_UNSAFE = memory.fastAccessSupported();
-
- static boolean equals(byte[] left, int leftIndex, byte[] right, int rightIndex, int length)
- {
- checkPositionIndexes(leftIndex, leftIndex + length, left.length);
- checkPositionIndexes(rightIndex, rightIndex + length, right.length);
-
- for (int i = 0; i < length; i++) {
- if (left[leftIndex + i] != right[rightIndex + i]) {
- return false;
- }
- }
- return true;
- }
-
- public static int lookupShort(short[] data, int index)
- {
- return memory.lookupShort(data, index);
- }
-
- public static int loadByte(byte[] data, int index)
- {
- return memory.loadByte(data, index);
- }
-
- static int loadInt(byte[] data, int index)
- {
- return memory.loadInt(data, index);
- }
-
- static void copyLong(byte[] src, int srcIndex, byte[] dest, int destIndex)
- {
- memory.copyLong(src, srcIndex, dest, destIndex);
- }
-
- static long loadLong(byte[] data, int index)
- {
- return memory.loadLong(data, index);
- }
-
- static void copyMemory(byte[] input, int inputIndex, byte[] output, int outputIndex, int length)
- {
- memory.copyMemory(input, inputIndex, output, outputIndex, length);
- }
+ private SnappyInternalUtils() {}
//
// Copied from Guava Preconditions
@@ -140,7 +50,7 @@ static void checkPositionIndexes(int start, int end, int size)
}
}
- static String badPositionIndexes(int start, int end, int size)
+ private static String badPositionIndexes(int start, int end, int size)
{
if (start < 0 || start > size) {
return badPositionIndex(start, size, "start index");
@@ -152,7 +62,7 @@ static String badPositionIndexes(int start, int end, int size)
return String.format("end index (%s) must not be less than start index (%s)", end, start);
}
- static String badPositionIndex(int index, int size, String desc)
+ private static String badPositionIndex(int index, int size, String desc)
{
if (index < 0) {
return String.format("%s (%s) must not be negative", desc, index);
diff --git a/src/main/java/org/iq80/snappy/SnappyOutputStream.java b/src/main/java/org/iq80/snappy/SnappyOutputStream.java
deleted file mode 100644
index d6b3afc..0000000
--- a/src/main/java/org/iq80/snappy/SnappyOutputStream.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-/**
- * This class implements an output stream for writing Snappy compressed data.
- * The output format is the stream header "snappy\0" followed by one or more
- * compressed blocks of data, each of which is preceded by a seven byte header.
- *
- * The first byte of the header is a flag indicating if the block is compressed
- * or not. A value of 0x00 means uncompressed, and 0x01 means compressed.
- *
- * The second and third bytes are the size of the block in the stream as a big
- * endian number. This value is never zero as empty blocks are never written.
- * The maximum allowed length is 32k (1 << 15).
- *
- * The remaining four byes are crc32c checksum of the user input data masked
- * with the following function: {@code ((crc >>> 15) | (crc << 17)) + 0xa282ead8 }
- *
- * An uncompressed block is simply copied from the input, thus guaranteeing
- * that the output is never larger than the input (not including the header).
- *
- * NOTE:This data produced by this class is not compatible with the
- * {@code x-snappy-framed} specification. It can only be read by
- * {@link SnappyInputStream}.
- *
- *
- * @deprecated Use {@link SnappyFramedOutputStream} which implements
- * the standard {@code x-snappy-framed} specification.
- */
-@Deprecated
-public class SnappyOutputStream
- extends AbstractSnappyOutputStream
-{
- static final byte[] STREAM_HEADER = new byte[] {'s', 'n', 'a', 'p', 'p', 'y', 0};
-
- // the header format requires the max block size to fit in 15 bits -- do not change!
- static final int MAX_BLOCK_SIZE = 1 << 15;
-
- /**
- * Write out the uncompressed content if the compression ratio (compressed length / raw length) exceeds this value.
- */
- public static final double MIN_COMPRESSION_RATIO = 7.0 / 8.0;
-
- private final boolean calculateChecksum;
-
- /**
- * Creates a Snappy output stream to write data to the specified underlying output stream.
- *
- * @param out the underlying output stream
- */
- public SnappyOutputStream(OutputStream out)
- throws IOException
- {
- this(out, true);
- }
-
- private SnappyOutputStream(OutputStream out, boolean calculateChecksum)
- throws IOException
- {
- super(out, MAX_BLOCK_SIZE, MIN_COMPRESSION_RATIO);
- this.calculateChecksum = calculateChecksum;
- }
-
- /**
- * Creates a Snappy output stream with block checksums disabled. This is only useful for
- * apples-to-apples benchmarks with other compressors that do not perform block checksums.
- *
- * @param out the underlying output stream
- */
- public static SnappyOutputStream newChecksumFreeBenchmarkOutputStream(OutputStream out)
- throws IOException
- {
- return new SnappyOutputStream(out, false);
- }
-
- @Override
- protected void writeHeader(OutputStream out)
- throws IOException
- {
- out.write(STREAM_HEADER);
- }
-
- @Override
- protected int calculateCRC32C(byte[] data, int offset, int length)
- {
- return calculateChecksum ? super.calculateCRC32C(data, offset, length) : 0;
- }
-
- @Override
- protected void writeBlock(OutputStream out, byte[] data, int offset, int length, boolean compressed, int crc32c)
- throws IOException
- {
- // write compressed flag
- out.write(compressed ? 0x01 : 0x00);
-
- // write length
- out.write(length >>> 8);
- out.write(length);
-
- // write crc32c of user input data
- out.write(crc32c >>> 24);
- out.write(crc32c >>> 16);
- out.write(crc32c >>> 8);
- out.write(crc32c);
-
- // write data
- out.write(data, offset, length);
- }
-}
diff --git a/src/main/java/org/iq80/snappy/SnappyRawCompressor.java b/src/main/java/org/iq80/snappy/SnappyRawCompressor.java
new file mode 100644
index 0000000..b9ee582
--- /dev/null
+++ b/src/main/java/org/iq80/snappy/SnappyRawCompressor.java
@@ -0,0 +1,411 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.iq80.snappy;
+
+import java.util.Arrays;
+
+import static org.iq80.snappy.SnappyConstants.COPY_1_BYTE_OFFSET;
+import static org.iq80.snappy.SnappyConstants.COPY_2_BYTE_OFFSET;
+import static org.iq80.snappy.SnappyConstants.SIZE_OF_INT;
+import static org.iq80.snappy.SnappyConstants.SIZE_OF_LONG;
+import static org.iq80.snappy.SnappyConstants.SIZE_OF_SHORT;
+import static org.iq80.snappy.UnsafeUtil.UNSAFE;
+
+final class SnappyRawCompressor
+{
+ // The size of a compression block. Note that many parts of the compression
+ // code assumes that BLOCK_SIZE <= 65536; in particular, the hash table
+ // can only store 16-bit offsets, and EmitCopy() also assumes the offset
+ // is 65535 bytes or less. Note also that if you change this, it will
+ // affect the framing format (see framing_format.txt).
+ //
+ // Note that there might be older data around that is compressed with larger
+ // block sizes, so the decompression code should not rely on the
+ // non-existence of long back-references.
+ private static final int BLOCK_LOG = 16;
+ private static final int BLOCK_SIZE = 1 << BLOCK_LOG;
+
+ private static final int INPUT_MARGIN_BYTES = 15;
+
+ private static final int MAX_HASH_TABLE_BITS = 14;
+ public static final int MAX_HASH_TABLE_SIZE = 1 << MAX_HASH_TABLE_BITS;
+
+ private SnappyRawCompressor() {}
+
+ public static int maxCompressedLength(int sourceLength)
+ {
+ // Compressed data can be defined as:
+ // compressed := item* literal*
+ // item := literal* copy
+ //
+ // The trailing literal sequence has a space blowup of at most 62/60
+ // since a literal of length 60 needs one tag byte + one extra byte
+ // for length information.
+ //
+ // Item blowup is trickier to measure. Suppose the "copy" op copies
+ // 4 bytes of data. Because of a special check in the encoding code,
+ // we produce a 4-byte copy only if the offset is < 65536. Therefore
+ // the copy op takes 3 bytes to encode, and this type of item leads
+ // to at most the 62/60 blowup for representing literals.
+ //
+ // Suppose the "copy" op copies 5 bytes of data. If the offset is big
+ // enough, it will take 5 bytes to encode the copy op. Therefore the
+ // worst case here is a one-byte literal followed by a five-byte copy.
+ // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
+ //
+ // This last factor dominates the blowup, so the final estimate is:
+ return 32 + sourceLength + sourceLength / 6;
+ }
+
+ // suppress warnings is required to use assert
+ @SuppressWarnings("IllegalToken")
+ public static int compress(
+ final Object inputBase,
+ final long inputAddress,
+ final long inputLimit,
+ final Object outputBase,
+ final long outputAddress,
+ final long outputLimit,
+ final short[] table)
+ {
+ // The compression code assumes output is larger than the max compression size (with 32 bytes of
+ // extra padding), and does not check bounds for writing to output.
+ int maxCompressedLength = maxCompressedLength((int) (inputLimit - inputAddress));
+ if (outputLimit - outputAddress < maxCompressedLength) {
+ throw new IllegalArgumentException("Output buffer must be at least " + maxCompressedLength + " bytes");
+ }
+
+ // First write the uncompressed size to the output as a variable length int
+ long output = writeUncompressedLength(outputBase, outputAddress, (int) (inputLimit - inputAddress));
+
+ for (long blockAddress = inputAddress; blockAddress < inputLimit; blockAddress += BLOCK_SIZE) {
+ final long blockLimit = Math.min(inputLimit, blockAddress + BLOCK_SIZE);
+ long input = blockAddress;
+ assert blockLimit - blockAddress <= BLOCK_SIZE;
+
+ int blockHashTableSize = getHashTableSize((int) (blockLimit - blockAddress));
+ Arrays.fill(table, 0, blockHashTableSize, (short) 0);
+
+ // todo given that hashTableSize is required to be a power of 2, this is overly complex
+ final int shift = 32 - log2Floor(blockHashTableSize);
+ assert (blockHashTableSize & (blockHashTableSize - 1)) == 0 : "table must be power of two";
+ assert 0xFFFFFFFF >>> shift == blockHashTableSize - 1;
+
+ // Bytes in [nextEmitAddress, input) will be emitted as literal bytes. Or
+ // [nextEmitAddress, inputLimit) after the main loop.
+ long nextEmitAddress = input;
+
+ final long fastInputLimit = blockLimit - INPUT_MARGIN_BYTES;
+ while (input <= fastInputLimit) {
+ assert nextEmitAddress <= input;
+
+ // The body of this loop emits a literal once and then emits a copy one
+ // or more times. (The exception is that when we're close to exhausting
+ // the input we exit and emit a literal.)
+ //
+ // In the first iteration of this loop we're just starting, so
+ // there's nothing to copy, so we must emit a literal once. And we
+ // only start a new iteration when the current iteration has determined
+ // that a literal will precede the next copy (if any).
+ //
+ // Step 1: Scan forward in the input looking for a 4-byte-long match.
+ // If we get close to exhausting the input exit and emit a final literal.
+ //
+ // Heuristic match skipping: If 32 bytes are scanned with no matches
+ // found, start looking only at every other byte. If 32 more bytes are
+ // scanned, look at every third byte, etc.. When a match is found,
+ // immediately go back to looking at every byte. This is a small loss
+ // (~5% performance, ~0.1% density) for compressible data due to more
+ // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+ // win since the compressor quickly "realizes" the data is incompressible
+ // and doesn't bother looking for matches everywhere.
+ //
+ // The "skip" variable keeps track of how many bytes there are since the
+ // last match; dividing it by 32 (ie. right-shifting by five) gives the
+ // number of bytes to move ahead for each iteration.
+ int skip = 32;
+
+ long candidateIndex = 0;
+ for (input += 1; input + (skip >>> 5) <= fastInputLimit; input += ((skip++) >>> 5)) {
+ // hash the 4 bytes starting at the input pointer
+ int currentInt = UNSAFE.getInt(inputBase, input);
+ int hash = hashBytes(currentInt, shift);
+
+ // get the position of a 4 bytes sequence with the same hash
+ candidateIndex = blockAddress + (table[hash] & 0xFFFF);
+ assert candidateIndex >= 0;
+ assert candidateIndex < input;
+
+ // update the hash to point to the current position
+ table[hash] = (short) (input - blockAddress);
+
+ // if the 4 byte sequence a the candidate index matches the sequence at the
+ // current position, proceed to the next phase
+ if (currentInt == UNSAFE.getInt(inputBase, candidateIndex)) {
+ break;
+ }
+ }
+ if (input + (skip >>> 5) > fastInputLimit) {
+ break;
+ }
+
+ // Step 2: A 4-byte match has been found. We'll later see if more
+ // than 4 bytes match. But, prior to the match, input
+ // bytes [nextEmit, ip) are unmatched. Emit them as "literal bytes."
+ assert nextEmitAddress + 16 <= blockLimit;
+
+ int literalLength = (int) (input - nextEmitAddress);
+ output = emitLiteralLength(outputBase, output, literalLength);
+
+ // Fast copy can use 8 extra bytes of input and output, which is safe because:
+ // - The input will always have INPUT_MARGIN_BYTES = 15 extra available bytes
+ // - The output will always have 32 spare bytes (see MaxCompressedLength).
+ output = fastCopy(inputBase, nextEmitAddress, outputBase, output, literalLength);
+
+ // Step 3: Call EmitCopy, and then see if another EmitCopy could
+ // be our next move. Repeat until we find no match for the
+ // input immediately after what was consumed by the last EmitCopy call.
+ //
+ // If we exit this loop normally then we need to call EmitLiteral next,
+ // though we don't yet know how big the literal will be. We handle that
+ // by proceeding to the next iteration of the main loop. We also can exit
+ // this loop via goto if we get close to exhausting the input.
+ int inputBytes;
+ do {
+ // We have a 4-byte match at input, and no need to emit any
+ // "literal bytes" prior to input.
+ assert (blockLimit >= input + SIZE_OF_INT);
+
+ // determine match length
+ int matched = count(inputBase, input + SIZE_OF_INT, candidateIndex + SIZE_OF_INT, blockLimit);
+ matched += SIZE_OF_INT;
+
+ // Emit the copy operation for this chunk
+ output = emitCopy(outputBase, output, input, candidateIndex, matched);
+ input += matched;
+
+ // are we done?
+ if (input >= fastInputLimit) {
+ break;
+ }
+
+ // We could immediately start working at input now, but to improve
+ // compression we first update table[Hash(ip - 1, ...)].
+ long longValue = UNSAFE.getLong(inputBase, input - 1);
+ int prevInt = (int) longValue;
+ inputBytes = (int) (longValue >>> 8);
+
+ // add hash starting with previous byte
+ int prevHash = hashBytes(prevInt, shift);
+ table[prevHash] = (short) (input - blockAddress - 1);
+
+ // update hash of current byte
+ int curHash = hashBytes(inputBytes, shift);
+
+ candidateIndex = blockAddress + (table[curHash] & 0xFFFF);
+ table[curHash] = (short) (input - blockAddress);
+ } while (inputBytes == UNSAFE.getInt(inputBase, candidateIndex));
+ nextEmitAddress = input;
+ }
+
+ // Emit the remaining bytes as a literal
+ if (nextEmitAddress < blockLimit) {
+ int literalLength = (int) (blockLimit - nextEmitAddress);
+ output = emitLiteralLength(outputBase, output, literalLength);
+ UNSAFE.copyMemory(inputBase, nextEmitAddress, outputBase, output, literalLength);
+ output += literalLength;
+ }
+ }
+
+ return (int) (output - outputAddress);
+ }
+
+ private static int count(Object inputBase, final long start, long matchStart, long matchLimit)
+ {
+ long current = start;
+
+ // first, compare long at a time
+ while (current < matchLimit - (SIZE_OF_LONG - 1)) {
+ long diff = UNSAFE.getLong(inputBase, matchStart) ^ UNSAFE.getLong(inputBase, current);
+ if (diff != 0) {
+ current += Long.numberOfTrailingZeros(diff) >> 3;
+ return (int) (current - start);
+ }
+
+ current += SIZE_OF_LONG;
+ matchStart += SIZE_OF_LONG;
+ }
+
+ if (current < matchLimit - (SIZE_OF_INT - 1) && UNSAFE.getInt(inputBase, matchStart) == UNSAFE.getInt(inputBase, current)) {
+ current += SIZE_OF_INT;
+ matchStart += SIZE_OF_INT;
+ }
+
+ if (current < matchLimit - (SIZE_OF_SHORT - 1) && UNSAFE.getShort(inputBase, matchStart) == UNSAFE.getShort(inputBase, current)) {
+ current += SIZE_OF_SHORT;
+ matchStart += SIZE_OF_SHORT;
+ }
+
+ if (current < matchLimit && UNSAFE.getByte(inputBase, matchStart) == UNSAFE.getByte(inputBase, current)) {
+ ++current;
+ }
+
+ return (int) (current - start);
+ }
+
+ private static long emitLiteralLength(Object outputBase, long output, int literalLength)
+ {
+ int n = literalLength - 1; // Zero-length literals are disallowed
+ if (n < 60) {
+ // Size fits in tag byte
+ UNSAFE.putByte(outputBase, output++, (byte) (n << 2));
+ }
+ else {
+ int bytes;
+ if (n < (1 << 8)) {
+ UNSAFE.putByte(outputBase, output++, (byte) (59 + 1 << 2));
+ bytes = 1;
+ }
+ else if (n < (1 << 16)) {
+ UNSAFE.putByte(outputBase, output++, (byte) (59 + 2 << 2));
+ bytes = 2;
+ }
+ else if (n < (1 << 24)) {
+ UNSAFE.putByte(outputBase, output++, (byte) (59 + 3 << 2));
+ bytes = 3;
+ }
+ else {
+ UNSAFE.putByte(outputBase, output++, (byte) (59 + 4 << 2));
+ bytes = 4;
+ }
+ // System is assumed to be little endian, so low bytes will be zero for the smaller numbers
+ UNSAFE.putInt(outputBase, output, n);
+ output += bytes;
+ }
+ return output;
+ }
+
+ private static long fastCopy(final Object inputBase, long input, final Object outputBase, long output, final int literalLength)
+ {
+ final long outputLimit = output + literalLength;
+ do {
+ UNSAFE.putLong(outputBase, output, UNSAFE.getLong(inputBase, input));
+ input += SIZE_OF_LONG;
+ output += SIZE_OF_LONG;
+ }
+ while (output < outputLimit);
+ return outputLimit;
+ }
+
+ private static long emitCopy(Object outputBase, long output, long input, long matchIndex, int matchLength)
+ {
+ long offset = input - matchIndex;
+
+ // Emit 64 byte copies but make sure to keep at least four bytes reserved
+ while (matchLength >= 68) {
+ UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((64 - 1) << 2)));
+ UNSAFE.putShort(outputBase, output, (short) offset);
+ output += SIZE_OF_SHORT;
+ matchLength -= 64;
+ }
+
+ // Emit an extra 60 byte copy if have too much data to fit in one copy
+ // length < 68
+ if (matchLength > 64) {
+ UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((60 - 1) << 2)));
+ UNSAFE.putShort(outputBase, output, (short) offset);
+ output += SIZE_OF_SHORT;
+ matchLength -= 60;
+ }
+
+ // Emit remainder
+ if ((matchLength < 12) && (offset < 2048)) {
+ int lenMinus4 = matchLength - 4;
+ UNSAFE.putByte(outputBase, output++, (byte) (COPY_1_BYTE_OFFSET + ((lenMinus4) << 2) + ((offset >>> 8) << 5)));
+ UNSAFE.putByte(outputBase, output++, (byte) (offset));
+ }
+ else {
+ UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((matchLength - 1) << 2)));
+ UNSAFE.putShort(outputBase, output, (short) offset);
+ output += SIZE_OF_SHORT;
+ }
+ return output;
+ }
+
+ @SuppressWarnings("IllegalToken")
+ private static int getHashTableSize(int inputSize)
+ {
+ // Use smaller hash table when input.size() is smaller, since we
+ // fill the table, incurring O(hash table size) overhead for
+ // compression, and if the input is short, we won't need that
+ // many hash table entries anyway.
+ assert (MAX_HASH_TABLE_SIZE >= 256);
+
+ // smallest power of 2 larger than inputSize
+ int target = Integer.highestOneBit(inputSize - 1) << 1;
+
+ // keep it between MIN_TABLE_SIZE and MAX_TABLE_SIZE
+ return Math.max(Math.min(target, MAX_HASH_TABLE_SIZE), 256);
+ }
+
+ // Any hash function will produce a valid compressed stream, but a good
+ // hash function reduces the number of collisions and thus yields better
+ // compression for compressible input, and more speed for incompressible
+ // input. Of course, it doesn't hurt if the hash function is reasonably fast
+ // either, as it gets called a lot.
+ private static int hashBytes(int value, int shift)
+ {
+ return (value * 0x1e35a7bd) >>> shift;
+ }
+
+ private static int log2Floor(int n)
+ {
+ return n == 0 ? -1 : 31 ^ Integer.numberOfLeadingZeros(n);
+ }
+
+ private static final int HIGH_BIT_MASK = 0x80;
+
+ /**
+ * Writes the uncompressed length as variable length integer.
+ */
+ private static long writeUncompressedLength(Object outputBase, long outputAddress, int uncompressedLength)
+ {
+ if (uncompressedLength < (1 << 7) && uncompressedLength >= 0) {
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength));
+ }
+ else if (uncompressedLength < (1 << 14) && uncompressedLength > 0) {
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 7));
+ }
+ else if (uncompressedLength < (1 << 21) && uncompressedLength > 0) {
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 14));
+ }
+ else if (uncompressedLength < (1 << 28) && uncompressedLength > 0) {
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 14) | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 21));
+ }
+ else {
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 14) | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 21) | HIGH_BIT_MASK));
+ UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 28));
+ }
+ return outputAddress;
+ }
+}
diff --git a/src/main/java/org/iq80/snappy/SnappyRawDecompressor.java b/src/main/java/org/iq80/snappy/SnappyRawDecompressor.java
new file mode 100644
index 0000000..4769170
--- /dev/null
+++ b/src/main/java/org/iq80/snappy/SnappyRawDecompressor.java
@@ -0,0 +1,320 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.iq80.snappy;
+
+import static org.iq80.snappy.SnappyConstants.LITERAL;
+import static org.iq80.snappy.SnappyConstants.SIZE_OF_INT;
+import static org.iq80.snappy.SnappyConstants.SIZE_OF_LONG;
+import static org.iq80.snappy.UnsafeUtil.UNSAFE;
+
+final class SnappyRawDecompressor
+{
+ private static final int[] DEC_32_TABLE = {4, 1, 2, 1, 4, 4, 4, 4};
+ private static final int[] DEC_64_TABLE = {0, 0, 0, -1, 0, 1, 2, 3};
+
+ private SnappyRawDecompressor() {}
+
+ public static int getUncompressedLength(Object compressed, long compressedAddress, long compressedLimit)
+ {
+ return readUncompressedLength(compressed, compressedAddress, compressedLimit)[0];
+ }
+
+ public static int decompress(
+ final Object inputBase,
+ final long inputAddress,
+ final long inputLimit,
+ final Object outputBase,
+ final long outputAddress,
+ final long outputLimit)
+ {
+ // Read the uncompressed length from the front of the input
+ long input = inputAddress;
+ int[] varInt = readUncompressedLength(inputBase, input, inputLimit);
+ int expectedLength = varInt[0];
+ input += varInt[1];
+
+ SnappyInternalUtils.checkArgument(expectedLength <= (outputLimit - outputAddress),
+ "Uncompressed length %s must be less than %s", expectedLength, (outputLimit - outputAddress));
+
+ // Process the entire input
+ int uncompressedSize = uncompressAll(
+ inputBase,
+ input,
+ inputLimit,
+ outputBase,
+ outputAddress,
+ outputLimit);
+
+ if (!(expectedLength == uncompressedSize)) {
+ throw new CorruptionException(0, String.format("Recorded length is %s bytes but actual length after decompression is %s bytes ",
+ expectedLength,
+ uncompressedSize));
+ }
+
+ return expectedLength;
+ }
+
+ private static int uncompressAll(
+ final Object inputBase,
+ final long inputAddress,
+ final long inputLimit,
+ final Object outputBase,
+ final long outputAddress,
+ final long outputLimit)
+ {
+ final long fastOutputLimit = outputLimit - SIZE_OF_LONG; // maximum offset in output buffer to which it's safe to write long-at-a-time
+
+ long output = outputAddress;
+ long input = inputAddress;
+
+ while (input < inputLimit) {
+ int opCode = UNSAFE.getByte(inputBase, input++) & 0xFF;
+ int entry = opLookupTable[opCode] & 0xFFFF;
+
+ int trailerBytes = entry >>> 11;
+ int trailer = 0;
+ if (input + SIZE_OF_INT < inputLimit) {
+ trailer = UNSAFE.getInt(inputBase, input) & wordmask[trailerBytes];
+ }
+ else {
+ if (input + trailerBytes > inputLimit) {
+ throw new CorruptionException(input - inputAddress);
+ }
+ switch (trailerBytes) {
+ case 4:
+ trailer = (UNSAFE.getByte(inputBase, input + 3) & 0xff) << 24;
+ case 3:
+ trailer |= (UNSAFE.getByte(inputBase, input + 2) & 0xff) << 16;
+ case 2:
+ trailer |= (UNSAFE.getByte(inputBase, input + 1) & 0xff) << 8;
+ case 1:
+ trailer |= (UNSAFE.getByte(inputBase, input) & 0xff);
+ }
+ }
+ if (trailer < 0) {
+ throw new CorruptionException(input - inputAddress);
+ }
+ input += trailerBytes;
+
+ int length = entry & 0xff;
+ if (length == 0) {
+ continue;
+ }
+
+ if ((opCode & 0x3) == LITERAL) {
+ int literalLength = length + trailer;
+ if (literalLength < 0) {
+ throw new CorruptionException(input - inputAddress);
+ }
+
+ // copy literal
+ long literalOutputLimit = output + literalLength;
+ if (literalOutputLimit > fastOutputLimit || input + literalLength > inputLimit - SIZE_OF_LONG) {
+ if (literalOutputLimit > outputLimit || input + literalLength > inputLimit) {
+ throw new CorruptionException(input - inputAddress);
+ }
+
+ // slow, precise copy
+ UNSAFE.copyMemory(inputBase, input, outputBase, output, literalLength);
+ input += literalLength;
+ output += literalLength;
+ }
+ else {
+ // fast copy. We may over-copy but there's enough room in input and output to not overrun them
+ do {
+ UNSAFE.putLong(outputBase, output, UNSAFE.getLong(inputBase, input));
+ input += SIZE_OF_LONG;
+ output += SIZE_OF_LONG;
+ }
+ while (output < literalOutputLimit);
+ input -= (output - literalOutputLimit); // adjust index if we over-copied
+ output = literalOutputLimit;
+ }
+ }
+ else {
+ // matchOffset/256 is encoded in bits 8..10. By just fetching
+ // those bits, we get matchOffset (since the bit-field starts at
+ // bit 8).
+ int matchOffset = entry & 0x700;
+ matchOffset += trailer;
+ if (matchOffset < 0) {
+ throw new CorruptionException(input - inputAddress);
+ }
+
+ long matchAddress = output - matchOffset;
+ if (matchAddress < outputAddress || output + length > outputLimit) {
+ throw new CorruptionException(input - inputAddress);
+ }
+ long matchOutputLimit = output + length;
+ if (matchOutputLimit > outputLimit) {
+ throw new CorruptionException(input - inputAddress);
+ }
+
+ if (output > fastOutputLimit) {
+ // slow match copy
+ while (output < matchOutputLimit) {
+ UNSAFE.putByte(outputBase, output++, UNSAFE.getByte(outputBase, matchAddress++));
+ }
+ }
+ else {
+ // copy repeated sequence
+ if (matchOffset < SIZE_OF_LONG) {
+ // 8 bytes apart so that we can copy long-at-a-time below
+ int increment32 = DEC_32_TABLE[matchOffset];
+ int decrement64 = DEC_64_TABLE[matchOffset];
+
+ UNSAFE.putByte(outputBase, output, UNSAFE.getByte(outputBase, matchAddress));
+ UNSAFE.putByte(outputBase, output + 1, UNSAFE.getByte(outputBase, matchAddress + 1));
+ UNSAFE.putByte(outputBase, output + 2, UNSAFE.getByte(outputBase, matchAddress + 2));
+ UNSAFE.putByte(outputBase, output + 3, UNSAFE.getByte(outputBase, matchAddress + 3));
+ output += SIZE_OF_INT;
+ matchAddress += increment32;
+
+ UNSAFE.putInt(outputBase, output, UNSAFE.getInt(outputBase, matchAddress));
+ output += SIZE_OF_INT;
+ matchAddress -= decrement64;
+ }
+ else {
+ UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress));
+ matchAddress += SIZE_OF_LONG;
+ output += SIZE_OF_LONG;
+ }
+
+ if (matchOutputLimit > fastOutputLimit) {
+ while (output < fastOutputLimit) {
+ UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress));
+ matchAddress += SIZE_OF_LONG;
+ output += SIZE_OF_LONG;
+ }
+
+ while (output < matchOutputLimit) {
+ UNSAFE.putByte(outputBase, output++, UNSAFE.getByte(outputBase, matchAddress++));
+ }
+ }
+ else {
+ while (output < matchOutputLimit) {
+ UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress));
+ matchAddress += SIZE_OF_LONG;
+ output += SIZE_OF_LONG;
+ }
+ }
+ }
+ output = matchOutputLimit; // correction in case we over-copied
+ }
+ }
+
+ return (int) (output - outputAddress);
+ }
+
+ // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
+ private static final int[] wordmask = new int[] {
+ 0, 0xff, 0xffff, 0xffffff, 0xffffffff
+ };
+
+ // Data stored per entry in lookup table:
+ // Range Bits-used Description
+ // ------------------------------------
+ // 1..64 0..7 Literal/copy length encoded in opcode byte
+ // 0..7 8..10 Copy offset encoded in opcode byte / 256
+ // 0..4 11..13 Extra bytes after opcode
+ //
+ // We use eight bits for the length even though 7 would have sufficed
+ // because of efficiency reasons:
+ // (1) Extracting a byte is faster than a bit-field
+ // (2) It properly aligns copy offset so we do not need a <<8
+ private static final short[] opLookupTable = new short[] {
+ 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
+ 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
+ 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
+ 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
+ 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
+ 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
+ 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
+ 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
+ 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
+ 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
+ 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
+ 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
+ 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
+ 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
+ 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
+ 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
+ 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
+ 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
+ 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
+ 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
+ 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
+ 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
+ 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
+ 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
+ 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
+ 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
+ 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
+ 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
+ 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
+ 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
+ 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
+ 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
+ };
+
+ /**
+ * Reads the variable length integer encoded a the specified offset, and
+ * returns this length with the number of bytes read.
+ */
+ private static int[] readUncompressedLength(Object compressed, long compressedAddress, long compressedLimit)
+ {
+ int result;
+ int bytesRead = 0;
+ {
+ int b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
+ bytesRead++;
+ result = b & 0x7f;
+ if ((b & 0x80) != 0) {
+ b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
+ bytesRead++;
+ result |= (b & 0x7f) << 7;
+ if ((b & 0x80) != 0) {
+ b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
+ bytesRead++;
+ result |= (b & 0x7f) << 14;
+ if ((b & 0x80) != 0) {
+ b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
+ bytesRead++;
+ result |= (b & 0x7f) << 21;
+ if ((b & 0x80) != 0) {
+ b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
+ bytesRead++;
+ result |= (b & 0x7f) << 28;
+ if ((b & 0x80) != 0) {
+ throw new CorruptionException(compressedAddress + bytesRead, "last byte of compressed length int has high bit set");
+ }
+ }
+ }
+ }
+ }
+ }
+ if (result < 0) {
+ throw new CorruptionException(compressedAddress, "negative compressed length");
+ }
+ return new int[] {result, bytesRead};
+ }
+
+ private static int getUnsignedByteSafe(Object base, long address, long limit)
+ {
+ if (address >= limit) {
+ throw new CorruptionException(limit - address, "Input is truncated");
+ }
+ return UNSAFE.getByte(base, address) & 0xFF;
+ }
+}
diff --git a/src/main/java/org/iq80/snappy/UnsafeMemory.java b/src/main/java/org/iq80/snappy/UnsafeMemory.java
deleted file mode 100644
index 478ca97..0000000
--- a/src/main/java/org/iq80/snappy/UnsafeMemory.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import sun.misc.Unsafe;
-
-import java.lang.reflect.Field;
-
-class UnsafeMemory
- implements Memory
-{
- private static final Unsafe unsafe;
-
- static {
- try {
- Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
- theUnsafe.setAccessible(true);
- unsafe = (Unsafe) theUnsafe.get(null);
- // It seems not all Unsafe implementations implement the following method.
- new UnsafeMemory().copyMemory(new byte[1], 0, new byte[1], 0, 1);
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- private static final long BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class);
- private static final long SHORT_ARRAY_OFFSET = unsafe.arrayBaseOffset(short[].class);
- private static final long SHORT_ARRAY_STRIDE = unsafe.arrayIndexScale(short[].class);
-
- @Override
- public boolean fastAccessSupported()
- {
- return true;
- }
-
- @Override
- public int lookupShort(short[] data, int index)
- {
- assert index >= 0;
- assert index <= data.length;
- return unsafe.getShort(data, SHORT_ARRAY_OFFSET + (index * SHORT_ARRAY_STRIDE)) & 0xFFFF;
- }
-
- @Override
- public int loadByte(byte[] data, int index)
- {
- assert index >= 0;
- assert index <= data.length;
- return unsafe.getByte(data, BYTE_ARRAY_OFFSET + index) & 0xFF;
- }
-
- @Override
- public int loadInt(byte[] data, int index)
- {
- assert index >= 0;
- assert index + 4 <= data.length;
- return unsafe.getInt(data, BYTE_ARRAY_OFFSET + index);
- }
-
- @Override
- public void copyLong(byte[] src, int srcIndex, byte[] dest, int destIndex)
- {
- assert srcIndex >= 0;
- assert srcIndex + 8 <= src.length;
- assert destIndex >= 0;
- assert destIndex + 8 <= dest.length;
- long value = unsafe.getLong(src, BYTE_ARRAY_OFFSET + srcIndex);
- unsafe.putLong(dest, (BYTE_ARRAY_OFFSET + destIndex), value);
- }
-
- @Override
- public long loadLong(byte[] data, int index)
- {
- assert index > 0;
- assert index + 4 < data.length;
- return unsafe.getLong(data, BYTE_ARRAY_OFFSET + index);
- }
-
- @Override
- public void copyMemory(byte[] input, int inputIndex, byte[] output, int outputIndex, int length)
- {
- assert inputIndex >= 0;
- assert inputIndex + length <= input.length;
- assert outputIndex >= 0;
- assert outputIndex + length <= output.length;
- unsafe.copyMemory(input, BYTE_ARRAY_OFFSET + inputIndex, output, BYTE_ARRAY_OFFSET + outputIndex, length);
- }
-}
diff --git a/src/main/java/org/iq80/snappy/UnsafeUtil.java b/src/main/java/org/iq80/snappy/UnsafeUtil.java
new file mode 100644
index 0000000..f102c01
--- /dev/null
+++ b/src/main/java/org/iq80/snappy/UnsafeUtil.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.iq80.snappy;
+
+import sun.misc.Unsafe;
+
+import java.lang.reflect.Field;
+import java.nio.ByteOrder;
+
+import static java.lang.String.format;
+
+final class UnsafeUtil
+{
+ public static final Unsafe UNSAFE;
+
+ private UnsafeUtil() {}
+
+ static {
+ ByteOrder order = ByteOrder.nativeOrder();
+ if (!order.equals(ByteOrder.LITTLE_ENDIAN)) {
+ throw new IncompatibleJvmException(format("Snappy requires a little endian platform (found %s)", order));
+ }
+
+ try {
+ Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
+ theUnsafe.setAccessible(true);
+ UNSAFE = (Unsafe) theUnsafe.get(null);
+ }
+ catch (Exception e) {
+ throw new IncompatibleJvmException("Snappy requires access to sun.misc.Unsafe");
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/org/iq80/snappy/BenchmarkDriver.java b/src/test/java/org/iq80/snappy/BenchmarkDriver.java
index a4c0034..5d7f2a5 100644
--- a/src/test/java/org/iq80/snappy/BenchmarkDriver.java
+++ b/src/test/java/org/iq80/snappy/BenchmarkDriver.java
@@ -225,7 +225,7 @@ public long compress(TestData testData, long iterations)
long start = System.nanoTime();
while (iterations-- > 0) {
rawOut.reset();
- SnappyOutputStream out = SnappyOutputStream.newChecksumFreeBenchmarkOutputStream(rawOut);
+ SnappyFramedOutputStream out = SnappyFramedOutputStream.newChecksumFreeBenchmarkOutputStream(rawOut);
out.write(contents);
out.close();
}
@@ -248,7 +248,7 @@ public long uncompress(TestData testData, long iterations)
byte[] contents = testData.getContents();
ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(Snappy.maxCompressedLength(contents.length));
- SnappyOutputStream out = SnappyOutputStream.newChecksumFreeBenchmarkOutputStream(compressedStream);
+ SnappyFramedOutputStream out = SnappyFramedOutputStream.newChecksumFreeBenchmarkOutputStream(compressedStream);
out.write(contents);
out.close();
byte[] compressed = compressedStream.toByteArray();
@@ -259,7 +259,7 @@ public long uncompress(TestData testData, long iterations)
long start = System.nanoTime();
while (iterations-- > 0) {
ByteArrayInputStream compIn = new ByteArrayInputStream(compressed);
- SnappyInputStream in = new SnappyInputStream(compIn, false);
+ SnappyFramedInputStream in = new SnappyFramedInputStream(compIn, false);
while (in.read(inputBuffer) >= 0) {
}
@@ -287,12 +287,12 @@ public long roundTrip(TestData testData, long iterations)
long start = System.nanoTime();
while (iterations-- > 0) {
compressedStream.reset();
- SnappyOutputStream out = SnappyOutputStream.newChecksumFreeBenchmarkOutputStream(compressedStream);
+ SnappyFramedOutputStream out = SnappyFramedOutputStream.newChecksumFreeBenchmarkOutputStream(compressedStream);
out.write(contents);
out.close();
ByteArrayInputStream compIn = new ByteArrayInputStream(compressedStream.getBuffer(), 0, compressedStream.size());
- SnappyInputStream in = new SnappyInputStream(compIn, false);
+ SnappyFramedInputStream in = new SnappyFramedInputStream(compIn, false);
while (in.read(inputBuffer) >= 0) {
}
@@ -314,7 +314,7 @@ public double getCompressionRatio(TestData testData)
int compressedSize;
try {
ByteArrayOutputStream rawOut = new ByteArrayOutputStream(Snappy.maxCompressedLength(contents.length));
- SnappyOutputStream out = SnappyOutputStream.newChecksumFreeBenchmarkOutputStream(rawOut);
+ SnappyFramedOutputStream out = SnappyFramedOutputStream.newChecksumFreeBenchmarkOutputStream(rawOut);
out.write(contents);
out.close();
diff --git a/src/test/java/org/iq80/snappy/SnappyBench.java b/src/test/java/org/iq80/snappy/SnappyBench.java
index 8674c56..46aa22a 100644
--- a/src/test/java/org/iq80/snappy/SnappyBench.java
+++ b/src/test/java/org/iq80/snappy/SnappyBench.java
@@ -116,11 +116,11 @@ public void verify()
byte[] contents = testData.getContents();
ByteArrayOutputStream rawOut = new ByteArrayOutputStream(Snappy.maxCompressedLength(contents.length));
- SnappyOutputStream out = new SnappyOutputStream(rawOut);
+ SnappyFramedOutputStream out = new SnappyFramedOutputStream(rawOut);
out.write(contents);
out.close();
- SnappyInputStream in = new SnappyInputStream(new ByteArrayInputStream(rawOut.toByteArray()));
+ SnappyFramedInputStream in = new SnappyFramedInputStream(new ByteArrayInputStream(rawOut.toByteArray()));
byte[] uncompressed = ByteStreams.toByteArray(in);
if (!Arrays.equals(uncompressed, testData.getContents())) {
diff --git a/src/test/java/org/iq80/snappy/SnappyFramedStreamTest.java b/src/test/java/org/iq80/snappy/SnappyFramedStreamTest.java
index 67e4e2e..5a2e004 100644
--- a/src/test/java/org/iq80/snappy/SnappyFramedStreamTest.java
+++ b/src/test/java/org/iq80/snappy/SnappyFramedStreamTest.java
@@ -18,8 +18,6 @@
package org.iq80.snappy;
import com.google.common.base.Charsets;
-import org.testng.annotations.AfterTest;
-import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import java.io.ByteArrayInputStream;
@@ -44,13 +42,6 @@
public class SnappyFramedStreamTest
extends AbstractSnappyStreamTest
{
- @BeforeTest
- @AfterTest
- public void resetBufferRecycler()
- {
- BufferRecycler.instance().clear();
- }
-
@Override
protected OutputStream createOutputStream(OutputStream target)
throws IOException
diff --git a/src/test/java/org/iq80/snappy/SnappyStreamTest.java b/src/test/java/org/iq80/snappy/SnappyStreamTest.java
deleted file mode 100644
index 4f26f6d..0000000
--- a/src/test/java/org/iq80/snappy/SnappyStreamTest.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2011 the original author or authors.
- * See the notice.md file distributed with this work for additional
- * information regarding copyright ownership.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.iq80.snappy;
-
-import com.google.common.base.Charsets;
-import org.testng.annotations.Test;
-
-import java.io.ByteArrayInputStream;
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-
-import static com.google.common.io.ByteStreams.toByteArray;
-import static com.google.common.primitives.UnsignedBytes.toInt;
-import static org.iq80.snappy.SnappyOutputStream.STREAM_HEADER;
-import static org.testng.Assert.assertEquals;
-
-public class SnappyStreamTest
- extends AbstractSnappyStreamTest
-{
- @Override
- protected OutputStream createOutputStream(OutputStream target)
- throws IOException
- {
- return new SnappyOutputStream(target);
- }
-
- @Override
- protected InputStream createInputStream(InputStream source, boolean verifyCheckSums)
- throws IOException
- {
- return new SnappyInputStream(source, verifyCheckSums);
- }
-
- @Override
- protected byte[] getMarkerFrame()
- {
- return STREAM_HEADER;
- }
-
- @Test
- public void testSimple()
- throws Exception
- {
- byte[] original = "aaaaaaaaaaaabbbbbbbaaaaaa".getBytes(Charsets.UTF_8);
-
- byte[] compressed = compress(original);
- byte[] uncompressed = uncompress(compressed);
-
- assertEquals(uncompressed, original);
- assertEquals(compressed.length, 33); // 7 byte stream header, 7 byte block header, 19 bytes compressed data
- assertEquals(Arrays.copyOf(compressed, 7), STREAM_HEADER); // stream header
- assertEquals(toInt(compressed[7]), 0x01); // flag: compressed
- assertEquals(toInt(compressed[8]), 0x00); // length: 19 = 0x0013
- assertEquals(toInt(compressed[9]), 0x13);
- assertEquals(toInt(compressed[10]), 0x92); // crc32c: 0x9274cda8
- assertEquals(toInt(compressed[11]), 0x74);
- assertEquals(toInt(compressed[12]), 0xCD);
- assertEquals(toInt(compressed[13]), 0xA8);
- }
-
- @Test
- public void testUncompressable()
- throws Exception
- {
- byte[] random = getRandom(1, 5000);
- int crc32c = Crc32C.maskedCrc32c(random);
-
- byte[] compressed = compress(random);
- byte[] uncompressed = uncompress(compressed);
-
- assertEquals(uncompressed, random);
- assertEquals(compressed.length, random.length + 7 + 7);
- assertEquals(toInt(compressed[7]), 0x00); // flag: uncompressed
- assertEquals(toInt(compressed[8]), 0x13); // length: 5000 = 0x1388
- assertEquals(toInt(compressed[9]), 0x88);
- assertEquals(ByteBuffer.wrap(compressed, 10, 4).getInt(), crc32c); // crc: see above
- }
-
- @Test
- public void testEmptyCompression()
- throws Exception
- {
- byte[] empty = new byte[0];
- assertEquals(compress(empty), STREAM_HEADER);
- assertEquals(uncompress(STREAM_HEADER), empty);
- }
-
- @Test(expectedExceptions = EOFException.class, expectedExceptionsMessageRegExp = ".*block header.*")
- public void testShortBlockHeader()
- throws Exception
- {
- uncompressBlock(new byte[] {0});
- }
-
- @Test(expectedExceptions = EOFException.class, expectedExceptionsMessageRegExp = ".*reading frame.*")
- public void testShortBlockData()
- throws Exception
- {
- uncompressBlock(new byte[] {0, 0, 4, 0, 0, 0, 0, 'x', 'x'}); // flag = 0, size = 4, crc32c = 0, block data = [x, x]
- }
-
- @Test(expectedExceptions = IOException.class, expectedExceptionsMessageRegExp = "invalid compressed flag in header: 0x41")
- public void testInvalidBlockHeaderCompressedFlag()
- throws Exception
- {
- uncompressBlock(new byte[] {'A', 0, 1, 0, 0, 0, 0, 0}); // flag = 'A', block size = 1, crc32c = 0
- }
-
- @Test(expectedExceptions = IOException.class, expectedExceptionsMessageRegExp = "invalid block size in header: 0")
- public void testInvalidBlockSizeZero()
- throws Exception
- {
- uncompressBlock(new byte[] {0, 0, 0, 0, 0, 0, 0}); // flag = '0', block size = 0, crc32c = 0
- }
-
- @Test(expectedExceptions = IOException.class, expectedExceptionsMessageRegExp = "invalid block size in header: 55555")
- public void testInvalidBlockSizeLarge()
- throws Exception
- {
- uncompressBlock(new byte[] {0, (byte) 0xD9, 0x03, 0, 0, 0, 0}); // flag = 0, block size = 55555, crc32c = 0
- }
-
- @Test(expectedExceptions = IOException.class, expectedExceptionsMessageRegExp = "Corrupt input: invalid checksum")
- public void testInvalidChecksum()
- throws Exception
- {
- uncompressBlock(new byte[] {0, 0, 1, 0, 0, 0, 0, 'a'}); // flag = 0, size = 4, crc32c = 0, block data = [a]
- }
-
- @Test
- public void testInvalidChecksumIgnoredWhenVerificationDisabled()
- throws Exception
- {
- byte[] block = {0, 0, 1, 0, 0, 0, 0, 'a'}; // flag = 0, size = 4, crc32c = 0, block data = [a]
- ByteArrayInputStream inputData = new ByteArrayInputStream(blockToStream(block));
- assertEquals(toByteArray(createInputStream(inputData, false)), new byte[] {'a'});
- }
-
- private byte[] uncompressBlock(byte[] block)
- throws IOException
- {
- return uncompress(blockToStream(block));
- }
-
- private static byte[] blockToStream(byte[] block)
- {
- byte[] stream = new byte[STREAM_HEADER.length + block.length];
- System.arraycopy(STREAM_HEADER, 0, stream, 0, STREAM_HEADER.length);
- System.arraycopy(block, 0, stream, STREAM_HEADER.length, block.length);
- return stream;
- }
-}
diff --git a/src/test/java/org/iq80/snappy/SnappyTest.java b/src/test/java/org/iq80/snappy/SnappyTest.java
index c8a49aa..2219240 100644
--- a/src/test/java/org/iq80/snappy/SnappyTest.java
+++ b/src/test/java/org/iq80/snappy/SnappyTest.java
@@ -22,16 +22,15 @@
import org.testng.annotations.Test;
import java.io.File;
-import java.util.Arrays;
import java.util.Random;
public class SnappyTest
{
private static final File TEST_DATA_DIR = new File("testdata");
- private RandomGenerator randomGenerator = new RandomGenerator(0.5);
+ private final RandomGenerator randomGenerator = new RandomGenerator(0.5);
@Test
- public void testByteForByteOutputSyntheticData()
+ public void testNativeCompatibleSyntheticData()
throws Exception
{
for (int i = 1; i < 65 * 1024; i++) {
@@ -39,13 +38,14 @@ public void testByteForByteOutputSyntheticData()
verifyCompression(i);
}
catch (Error e) {
+ e.printStackTrace();
Assert.fail(i + " byte block", e);
}
}
}
@Test
- public void testByteForByteTestData()
+ public void testNativeCompatibleTestData()
throws Exception
{
for (File testFile : getTestFiles()) {
@@ -54,12 +54,36 @@ public void testByteForByteTestData()
verifyCompression(data, 0, data.length);
}
catch (Throwable e) {
+ e.printStackTrace();
Assert.fail("Testdata: " + testFile.getName(), e);
-
}
}
}
+ @Test(expectedExceptions = CorruptionException.class, expectedExceptionsMessageRegExp = "Malformed input: offset=5")
+ public void testInvalidLiteralLength()
+ {
+ byte[] data = {
+ // Encoded uncompressed length 1024
+ -128, 8,
+ // op-code
+ (byte) 252,
+ // Trailer value Integer.MAX_VALUE
+ (byte) 0b1111_1111, (byte) 0b1111_1111, (byte) 0b1111_1111, (byte) 0b0111_1111,
+ // Some arbitrary data
+ 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ Snappy.uncompress(data, 0, data.length, new byte[1024], 0, 1024);
+ }
+
+ @Test(expectedExceptions = CorruptionException.class, expectedExceptionsMessageRegExp = "negative compressed length: offset=16")
+ public void testNegativeLength()
+ {
+ byte[] data = {(byte) 255, (byte) 255, (byte) 255, (byte) 255, 0b0000_1000};
+ Snappy.getUncompressedLength(data, 0);
+ }
+
private void verifyCompression(int size)
throws Exception
{
@@ -72,16 +96,8 @@ private void verifyCompression(int size)
private static void verifyCompression(byte[] input, int position, int size)
throws Exception
{
- byte[] nativeCompressed = new byte[org.xerial.snappy.Snappy.maxCompressedLength(size)];
byte[] javaCompressed = new byte[Snappy.maxCompressedLength(size)];
- int nativeCompressedSize = org.xerial.snappy.Snappy.compress(
- input,
- position,
- size,
- nativeCompressed,
- 0);
-
int javaCompressedSize = Snappy.compress(
input,
position,
@@ -89,29 +105,34 @@ private static void verifyCompression(byte[] input, int position, int size)
javaCompressed,
0);
- // verify outputs are exactly the same
- String failureMessage = "Invalid compressed output for input size " + size + " at offset " + position;
- if (!SnappyInternalUtils.equals(javaCompressed, 0, nativeCompressed, 0, nativeCompressedSize)) {
- if (nativeCompressedSize < 100) {
- Assert.assertEquals(
- Arrays.toString(Arrays.copyOf(javaCompressed, nativeCompressedSize)),
- Arrays.toString(Arrays.copyOf(nativeCompressed, nativeCompressedSize)),
- failureMessage
- );
- }
- else {
- Assert.fail(failureMessage);
- }
- }
- Assert.assertEquals(javaCompressedSize, nativeCompressedSize);
-
- // verify the contents can be uncompressed
+ // Verify Java codec decompresses Java compressed data
byte[] uncompressed = new byte[size];
- Snappy.uncompress(javaCompressed, 0, javaCompressedSize, uncompressed, 0);
+ int uncompressedSize = Snappy.uncompress(javaCompressed, 0, javaCompressedSize, uncompressed, 0);
+ Assert.assertEquals(uncompressedSize, size, "Size mismatch");
+ Assert.assertTrue(arraysEqual(input, position, uncompressed, 0, size), "Data mismatch");
- if (!SnappyInternalUtils.equals(uncompressed, 0, input, position, size)) {
- Assert.fail("Invalid uncompressed output for input size " + size + " at offset " + position);
- }
+ // Verify Native codec decompresses Java compressed data
+ byte[] nativeUncompressed = new byte[size];
+ int nativeUncompressedSize = org.xerial.snappy.Snappy.uncompress(
+ javaCompressed,
+ 0,
+ javaCompressedSize,
+ nativeUncompressed,
+ 0);
+ Assert.assertEquals(nativeUncompressedSize, size, "Size mismatch");
+ Assert.assertTrue(arraysEqual(input, position, nativeUncompressed, 0, size), "Data mismatch");
+
+ // Verify Java codec decompresses Native compressed data
+ byte[] nativeCompressed = new byte[org.xerial.snappy.Snappy.maxCompressedLength(size)];
+ int nativeCompressedSize = org.xerial.snappy.Snappy.compress(
+ input,
+ position,
+ size,
+ nativeCompressed,
+ 0);
+ uncompressedSize = Snappy.uncompress(nativeCompressed, 0, nativeCompressedSize, uncompressed, 0);
+ Assert.assertEquals(uncompressedSize, size, "Size mismatch");
+ Assert.assertTrue(arraysEqual(input, position, uncompressed, 0, size), "Data mismatch");
}
public static class RandomGenerator
@@ -177,4 +198,14 @@ static File[] getTestFiles()
Assert.assertTrue(testFiles != null && testFiles.length > 0, "No test files at " + TEST_DATA_DIR.getAbsolutePath());
return testFiles;
}
+
+ private static boolean arraysEqual(byte[] left, int leftIndex, byte[] right, int rightIndex, int length)
+ {
+ for (int i = 0; i < length; i++) {
+ if (left[leftIndex + i] != right[rightIndex + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
}