-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce raw fwd index version V5 containing implicit num doc length, improving space efficiency #14105
Introduce raw fwd index version V5 containing implicit num doc length, improving space efficiency #14105
Changes from 54 commits
84987ed
d654fd9
3d4b99b
8c967b5
c2359ec
dd3410f
0c0df84
e7e091b
153be16
0233905
69defe1
e1173c0
34ac786
b090676
2ff1914
54b2709
318b826
a9170b7
d699c2a
7137792
6452c79
9bfbd22
fac46c5
29a9fdb
8abf7fe
1c877c6
ce6870b
0d71f91
736f23f
f2faece
01cbf56
f8c2f24
a6ca351
61fee18
500dae6
427bdb5
0cb705d
1617ebd
813d360
7a565e4
296959d
0d7073b
427af58
6e8c3ae
52976a7
9bb453a
93d3100
79e91e9
9a7676f
aa9eb74
4ce5280
0d88f2f
ec1b628
87dd327
e7f645c
5cb4575
1ece331
4c35683
bd1da13
2637e2d
731906e
171aaf4
bca5eda
ce5eb7b
7274f4c
ea29a13
256d774
acfe864
a4751b6
32062a1
ef3f663
38a8cb6
b8dfacd
bd9bdee
5b6e29e
597762f
ff21345
cde2a6d
d430d61
3f68f75
27328bf
79c6f66
e9778d3
b43f676
c6033b9
3f654e4
12af1ce
063c5b4
b8794f2
9812e3e
085aed6
7e1d10c
d06dda4
e9835c5
06c0b95
07d6f75
680fc24
1b22234
cfbc9ee
0da0ca7
89ec8af
592967a
44b0df8
6fe4517
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.pinot.segment.local.io.writer.impl; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import javax.annotation.concurrent.NotThreadSafe; | ||
import org.apache.pinot.segment.local.utils.ArraySerDeUtils; | ||
import org.apache.pinot.segment.spi.compression.ChunkCompressionType; | ||
|
||
|
||
/** | ||
* Forward index writer that extends {@link VarByteChunkForwardIndexWriterV4} with the only difference being the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not the only difference. Let's also document the value format difference There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
* version tag is now bumped from 4 to 5. | ||
* | ||
* <p>The {@code VERSION} tag is a {@code static final} class variable set to {@code 5}. Since static variables | ||
* are shadowed in the child class thus associated with the class that defines them, care must be taken to ensure | ||
* that the parent class can correctly observe the child class's {@code VERSION} value at runtime.</p> | ||
* | ||
* <p>To achieve this, the {@code getVersion()} method is overridden to return the concrete subclass's | ||
* {@code VERSION} value, ensuring that the correct version number is returned even when using a reference | ||
* to the parent class.</p> | ||
* | ||
* @see VarByteChunkForwardIndexWriterV4 | ||
* @see VarByteChunkForwardIndexWriterV5#getVersion() | ||
*/ | ||
@NotThreadSafe | ||
public class VarByteChunkForwardIndexWriterV5 extends VarByteChunkForwardIndexWriterV4 { | ||
public static final int VERSION = 5; | ||
|
||
public VarByteChunkForwardIndexWriterV5(File file, ChunkCompressionType compressionType, int chunkSize) | ||
throws IOException { | ||
super(file, compressionType, chunkSize); | ||
} | ||
|
||
@Override | ||
public int getVersion() { | ||
return VERSION; | ||
} | ||
|
||
@Override | ||
public void putIntMV(int[] values) { | ||
putBytes(ArraySerDeUtils.serializeIntArrayWithoutLength(values)); | ||
} | ||
|
||
@Override | ||
public void putLongMV(long[] values) { | ||
putBytes(ArraySerDeUtils.serializeLongArrayWithoutLength(values)); | ||
} | ||
|
||
@Override | ||
public void putFloatMV(float[] values) { | ||
putBytes(ArraySerDeUtils.serializeFloatArrayWithoutLength(values)); | ||
} | ||
|
||
@Override | ||
public void putDoubleMV(double[] values) { | ||
putBytes(ArraySerDeUtils.serializeDoubleArrayWithoutLength(values)); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,8 +67,7 @@ public class VarByteChunkForwardIndexReaderV4 | |
|
||
public VarByteChunkForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.DataType storedType, | ||
boolean isSingleValue) { | ||
int version = dataBuffer.getInt(0); | ||
Preconditions.checkState(version == VarByteChunkForwardIndexWriterV4.VERSION, "Illegal index version: %s", version); | ||
validateIndexVersion(dataBuffer); | ||
jackluo923 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
_storedType = storedType; | ||
_targetDecompressedChunkSize = dataBuffer.getInt(4); | ||
_chunkCompressionType = ChunkCompressionType.valueOf(dataBuffer.getInt(8)); | ||
|
@@ -81,6 +80,11 @@ public VarByteChunkForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.Da | |
_isSingleValue = isSingleValue; | ||
} | ||
|
||
public void validateIndexVersion(PinotDataBuffer dataBuffer) { | ||
jackluo923 marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant we can add a |
||
int version = dataBuffer.getInt(0); | ||
Preconditions.checkState(version == VarByteChunkForwardIndexWriterV4.VERSION, "Illegal index version: %s", version); | ||
} | ||
|
||
@Override | ||
public boolean isDictionaryEncoded() { | ||
return false; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.pinot.segment.local.segment.index.readers.forward; | ||
|
||
import com.google.common.base.Preconditions; | ||
import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkForwardIndexWriterV4; | ||
import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkForwardIndexWriterV5; | ||
import org.apache.pinot.segment.local.utils.ArraySerDeUtils; | ||
import org.apache.pinot.segment.spi.memory.PinotDataBuffer; | ||
import org.apache.pinot.spi.data.FieldSpec; | ||
|
||
|
||
/** | ||
* Chunk-based raw (non-dictionary-encoded) forward index reader for values of SV variable length data types | ||
* (BIG_DECIMAL, STRING, BYTES), MV fixed length and MV variable length data types. | ||
* <p>For data layout, please refer to the documentation for {@link VarByteChunkForwardIndexWriterV4} | ||
*/ | ||
public class VarByteChunkForwardIndexReaderV5 extends VarByteChunkForwardIndexReaderV4 { | ||
public VarByteChunkForwardIndexReaderV5(PinotDataBuffer dataBuffer, FieldSpec.DataType storedType, | ||
boolean isSingleValue) { | ||
super(dataBuffer, storedType, isSingleValue); | ||
} | ||
|
||
@Override | ||
public void validateIndexVersion(PinotDataBuffer dataBuffer) { | ||
int version = dataBuffer.getInt(0); | ||
Preconditions.checkState(version == VarByteChunkForwardIndexWriterV5.VERSION, "Illegal index version: %s", version); | ||
} | ||
|
||
@Override | ||
public int getIntMV(int docId, int[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { | ||
return ArraySerDeUtils.deserializeIntArrayWithoutLength(context.getValue(docId), valueBuffer); | ||
} | ||
|
||
@Override | ||
public int[] getIntMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { | ||
return ArraySerDeUtils.deserializeIntArrayWithoutLength(context.getValue(docId)); | ||
} | ||
|
||
@Override | ||
public int getLongMV(int docId, long[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { | ||
return ArraySerDeUtils.deserializeLongArrayWithoutLength(context.getValue(docId), valueBuffer); | ||
} | ||
|
||
@Override | ||
public long[] getLongMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { | ||
return ArraySerDeUtils.deserializeLongArrayWithoutLength(context.getValue(docId)); | ||
} | ||
|
||
@Override | ||
public int getFloatMV(int docId, float[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { | ||
return ArraySerDeUtils.deserializeFloatArrayWithoutLength(context.getValue(docId), valueBuffer); | ||
} | ||
|
||
@Override | ||
public float[] getFloatMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { | ||
return ArraySerDeUtils.deserializeFloatArrayWithoutLength(context.getValue(docId)); | ||
} | ||
|
||
@Override | ||
public int getDoubleMV(int docId, double[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { | ||
return ArraySerDeUtils.deserializeDoubleArrayWithoutLength(context.getValue(docId), valueBuffer); | ||
} | ||
|
||
@Override | ||
public double[] getDoubleMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { | ||
return ArraySerDeUtils.deserializeDoubleArrayWithoutLength(context.getValue(docId)); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.