Skip to content

Commit

Permalink
Move postings back to int[] to take advantage of having more lanes pe…
Browse files Browse the repository at this point in the history
…r vector. (#13968)

In Lucene 8.4, we updated postings to work on long[] arrays internally. This
allowed us to workaround the lack of explicit vectorization (auto-vectorization
doesn't detect all the scenarios that we would like to handle) support in the
JVM by summing up two integers in one operation for instance.

With explicit vectorization now available, it looks like we can get more
benefits from the ability to compare multiple intetgers in one operations than
from summing up two integers in one operation. Moving back to ints helps
compare 2x more integers at once vs. longs.
  • Loading branch information
jpountz committed Nov 4, 2024
1 parent 584387a commit 8ae03d6
Show file tree
Hide file tree
Showing 96 changed files with 6,832 additions and 926 deletions.
55 changes: 53 additions & 2 deletions gradle/generation/forUtil.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ configure(project(":lucene:core")) {
description "Regenerate gen_ForUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/codecs/lucene912")
def genDir = file("src/java/org/apache/lucene/codecs/lucene101")
def genScript = file("${genDir}/gen_ForUtil.py")
def genOutput = file("${genDir}/ForUtil.java")

Expand All @@ -48,7 +48,7 @@ configure(project(":lucene:core")) {
description "Regenerate gen_ForDeltaUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/codecs/lucene912")
def genDir = file("src/java/org/apache/lucene/codecs/lucene101")
def genScript = file("${genDir}/gen_ForDeltaUtil.py")
def genOutput = file("${genDir}/ForDeltaUtil.java")

Expand All @@ -68,6 +68,7 @@ configure(project(":lucene:core")) {
andThenTasks: ["spotlessJava", "spotlessJavaApply"],
mustRunBefore: [ "compileJava" ]
])

}

configure(project(":lucene:backward-codecs")) {
Expand Down Expand Up @@ -146,5 +147,55 @@ configure(project(":lucene:backward-codecs")) {
andThenTasks: ["spotlessJava", "spotlessJavaApply"],
mustRunBefore: [ "compileJava" ]
])

task generateForUtil912Internal() {
description "Regenerate gen_ForUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/backward_codecs/lucene912")
def genScript = file("${genDir}/gen_ForUtil.py")
def genOutput = file("${genDir}/ForUtil.java")

inputs.file genScript
outputs.file genOutput

doLast {
quietExec {
workingDir genDir
executable project.externalTool("python3")
args = [ '-B', genScript ]
}
}
}

regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil912Internal, [
andThenTasks: ["spotlessJava", "spotlessJavaApply"],
mustRunBefore: [ "compileJava" ]
])

task generateForDeltaUtil912Internal() {
description "Regenerate gen_ForDeltaUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/backward_codecs/lucene912")
def genScript = file("${genDir}/gen_ForDeltaUtil.py")
def genOutput = file("${genDir}/ForDeltaUtil.java")

inputs.file genScript
outputs.file genOutput

doLast {
quietExec {
workingDir genDir
executable project.externalTool("python3")
args = [ '-B', genScript ]
}
}
}

regenerate.dependsOn wrapWithPersistentChecksums(generateForDeltaUtil912Internal, [
andThenTasks: ["spotlessJava", "spotlessJavaApply"],
mustRunBefore: [ "compileJava" ]
])
}

5 changes: 5 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ Optimizations

* GITHUB#13763: Replace Map<String,Object> with IntObjectHashMap for KnnVectorsReader (Pan Guixin)

* GITHUB#13968: Switch postings from storing doc IDs in a long[] to an int[].
Lucene 8.4 had moved to a long[] to help speed up block decoding by using
longs that would pack two integers. We are now moving back to integers to be
able to take advantage of 2x more lanes with the vector API. (Adrien Grand)

Bug Fixes
---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/ForDeltaUtil.java": "b81961f0b277b1458ca259e0d23ccc4eeeb47fe7",
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/gen_ForDeltaUtil.py": "3191d7591309b7876c5c709fb9375af5b87c2ef8"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/ForUtil.java": "e6db3c665dfebca8b93eb6b4651d2eb3af637b02",
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/gen_ForUtil.py": "993ecc9cf7ea821963384070669695257b16e040"
}
7 changes: 5 additions & 2 deletions lucene/backward-codecs/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
exports org.apache.lucene.backward_codecs.lucene95;
exports org.apache.lucene.backward_codecs.lucene99;
exports org.apache.lucene.backward_codecs.lucene912;
exports org.apache.lucene.backward_codecs.lucene100;
exports org.apache.lucene.backward_codecs.packed;
exports org.apache.lucene.backward_codecs.store;

Expand All @@ -46,7 +47,8 @@
org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat,
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat,
org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat,
org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat;
org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat,
org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat,
org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat,
Expand All @@ -64,5 +66,6 @@
org.apache.lucene.backward_codecs.lucene94.Lucene94Codec,
org.apache.lucene.backward_codecs.lucene95.Lucene95Codec,
org.apache.lucene.backward_codecs.lucene99.Lucene99Codec,
org.apache.lucene.backward_codecs.lucene912.Lucene912Codec;
org.apache.lucene.backward_codecs.lucene912.Lucene912Codec,
org.apache.lucene.backward_codecs.lucene100.Lucene100Codec;
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene100;
package org.apache.lucene.backward_codecs.lucene100;

import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
Expand All @@ -37,7 +38,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat;
Expand All @@ -50,7 +50,7 @@
*
* <p>If you want to reuse functionality of this codec in another codec, extend {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene100 package documentation for file format details.
* @see org.apache.lucene.backward_codecs.lucene100 package documentation for file format details.
* @lucene.experimental
*/
public class Lucene100Codec extends Codec {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
* limitations under the License.
*/

/** Lucene 9.12 file format. */
package org.apache.lucene.codecs.lucene912;
/** Lucene 10.0 file format. */
package org.apache.lucene.backward_codecs.lucene100;
Loading

0 comments on commit 8ae03d6

Please sign in to comment.