Resolved merge conflict in changelog

apache · Oct 30, 2024 · 43238d1 · 43238d1
2 parents f25c138 + 3041af7
commit 43238d1
Show file tree

Hide file tree

Showing 44 changed files with 673 additions and 536 deletions.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -38,6 +38,8 @@ API Changes
 
 * GITHUB#13950: Make BooleanQuery#getClauses public and add #add(Collection<BooleanClause>) to BQ builder. (Shubham Chaudhary)
 
+* GITHUB#13957: Removed LeafSimScorer class, to save its overhead. Scorers now
+  compute scores directly from a SimScorer, postings and norms. (Adrien Grand)
 
 New Features
 ---------------------
@@ -70,6 +72,16 @@ Optimizations
 
 * GITHUB#13899: Check ahead if we can get the count. (Lu Xugang)
 
+* GITHUB#13943: Removed shared `HitsThresholdChecker`, which reduces overhead
+  but may delay a bit when dynamic pruning kicks in. (Adrien Grand)
+
+* GITHUB#13961: Replace Map<String,Object> with IntObjectHashMap for DV producer. (Pan Guixin)
+
+* GITHUB#13963: Speed up nextDoc() implementations in Lucene912PostingsReader.
+  (Adrien Grand)
+
+* GITHUB#13958: Speed up advancing within a block. (Adrien Grand)
+
 * GITHUB#13763: Replace Map<String,Object> with IntObjectHashMap for KnnVectorsReader (Pan Guixin)
 
 Bug Fixes
@@ -79,6 +91,7 @@ Bug Fixes
 * GITHUB#13884: Remove broken .toArray from Long/CharObjectHashMap entirely. (Pan Guixin)
 * GITHUB#12686: Added support for highlighting IndexOrDocValuesQuery. (Prudhvi Godithi)
 * GITHUB#13927: Fix StoredFieldsConsumer finish. (linfn)
+* GITHUB#13944: Ensure deterministic order of clauses for `DisjunctionMaxQuery#toString`. (Laurent Jakubina)
 
 Build
 ---------------------

diff --git a/...codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java b/...codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
@@ -17,8 +17,6 @@
 package org.apache.lucene.backward_codecs.lucene80;
 
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicReader;
 import org.apache.lucene.backward_codecs.packed.LegacyDirectReader;
 import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
@@ -41,6 +39,7 @@
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.internal.hppc.IntObjectHashMap;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataInput;
@@ -53,11 +52,11 @@
 
 /** reader for {@link Lucene80DocValuesFormat} */
 final class Lucene80DocValuesProducer extends DocValuesProducer {
-  private final Map<String, NumericEntry> numerics = new HashMap<>();
-  private final Map<String, BinaryEntry> binaries = new HashMap<>();
-  private final Map<String, SortedEntry> sorted = new HashMap<>();
-  private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
-  private final Map<String, SortedNumericEntry> sortedNumerics = new HashMap<>();
+  private final IntObjectHashMap<NumericEntry> numerics = new IntObjectHashMap<>();
+  private final IntObjectHashMap<BinaryEntry> binaries = new IntObjectHashMap<>();
+  private final IntObjectHashMap<SortedEntry> sorted = new IntObjectHashMap<>();
+  private final IntObjectHashMap<SortedSetEntry> sortedSets = new IntObjectHashMap<>();
+  private final IntObjectHashMap<SortedNumericEntry> sortedNumerics = new IntObjectHashMap<>();
   private final IndexInput data;
   private final int maxDoc;
   private int version = -1;
@@ -139,7 +138,7 @@ private void readFields(String segmentName, IndexInput meta, FieldInfos infos)
       }
       byte type = meta.readByte();
       if (type == Lucene80DocValuesFormat.NUMERIC) {
-        numerics.put(info.name, readNumeric(meta));
+        numerics.put(info.number, readNumeric(meta));
       } else if (type == Lucene80DocValuesFormat.BINARY) {
         final boolean compressed;
         if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
@@ -158,13 +157,13 @@ private void readFields(String segmentName, IndexInput meta, FieldInfos infos)
         } else {
           compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
         }
-        binaries.put(info.name, readBinary(meta, compressed));
+        binaries.put(info.number, readBinary(meta, compressed));
       } else if (type == Lucene80DocValuesFormat.SORTED) {
-        sorted.put(info.name, readSorted(meta));
+        sorted.put(info.number, readSorted(meta));
       } else if (type == Lucene80DocValuesFormat.SORTED_SET) {
-        sortedSets.put(info.name, readSortedSet(meta));
+        sortedSets.put(info.number, readSortedSet(meta));
       } else if (type == Lucene80DocValuesFormat.SORTED_NUMERIC) {
-        sortedNumerics.put(info.name, readSortedNumeric(meta));
+        sortedNumerics.put(info.number, readSortedNumeric(meta));
       } else {
         throw new CorruptIndexException("invalid type: " + type, meta);
       }
@@ -426,7 +425,7 @@ private static class SortedNumericEntry extends NumericEntry {
 
   @Override
   public NumericDocValues getNumeric(FieldInfo field) throws IOException {
-    NumericEntry entry = numerics.get(field.name);
+    NumericEntry entry = numerics.get(field.number);
     return getNumeric(entry);
   }
 
@@ -915,7 +914,7 @@ BytesRef decode(int docNumber) throws IOException {
 
   @Override
   public BinaryDocValues getBinary(FieldInfo field) throws IOException {
-    BinaryEntry entry = binaries.get(field.name);
+    BinaryEntry entry = binaries.get(field.number);
     if (entry.compressed) {
       return getCompressedBinary(entry);
     } else {
@@ -973,7 +972,7 @@ public BytesRef binaryValue() throws IOException {
 
   @Override
   public SortedDocValues getSorted(FieldInfo field) throws IOException {
-    SortedEntry entry = sorted.get(field.name);
+    SortedEntry entry = sorted.get(field.number);
     return getSorted(entry);
   }
 
@@ -1407,7 +1406,7 @@ public int docFreq() throws IOException {
 
   @Override
   public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
-    SortedNumericEntry entry = sortedNumerics.get(field.name);
+    SortedNumericEntry entry = sortedNumerics.get(field.number);
     if (entry.numValues == entry.numDocsWithField) {
       return DocValues.singleton(getNumeric(entry));
     }
@@ -1543,7 +1542,7 @@ private void set() {
 
   @Override
   public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
-    SortedSetEntry entry = sortedSets.get(field.name);
+    SortedSetEntry entry = sortedSets.get(field.number);
     if (entry.singleValueEntry != null) {
       return DocValues.singleton(getSorted(entry.singleValueEntry));
     }

diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/AdvanceBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/AdvanceBenchmark.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.util.Arrays;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.VectorUtil;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.CompilerControl;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 1)
+@Fork(
+    value = 3,
+    jvmArgsAppend = {
+      "-Xmx1g",
+      "-Xms1g",
+      "-XX:+AlwaysPreTouch",
+      "--add-modules",
+      "jdk.incubator.vector"
+    })
+public class AdvanceBenchmark {
+
+  private final long[] values = new long[129];
+  private final int[] startIndexes = new int[1_000];
+  private final long[] targets = new long[startIndexes.length];
+
+  @Setup(Level.Trial)
+  public void setup() throws Exception {
+    for (int i = 0; i < 128; ++i) {
+      values[i] = i;
+    }
+    values[128] = DocIdSetIterator.NO_MORE_DOCS;
+    Random r = new Random(0);
+    for (int i = 0; i < startIndexes.length; ++i) {
+      startIndexes[i] = r.nextInt(64);
+      targets[i] = startIndexes[i] + 1 + r.nextInt(1 << r.nextInt(7));
+    }
+  }
+
+  @Benchmark
+  public void binarySearch() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      binarySearch(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int binarySearch(long[] values, long target, int startIndex) {
+    // Standard binary search
+    int i = Arrays.binarySearch(values, startIndex, values.length, target);
+    if (i < 0) {
+      i = -1 - i;
+    }
+    return i;
+  }
+
+  @Benchmark
+  public void inlinedBranchlessBinarySearch() {
+    for (int i = 0; i < targets.length; ++i) {
+      inlinedBranchlessBinarySearch(values, targets[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int inlinedBranchlessBinarySearch(long[] values, long target) {
+    // This compiles to cmov instructions.
+    int start = 0;
+
+    if (values[63] < target) {
+      start += 64;
+    }
+    if (values[start + 31] < target) {
+      start += 32;
+    }
+    if (values[start + 15] < target) {
+      start += 16;
+    }
+    if (values[start + 7] < target) {
+      start += 8;
+    }
+    if (values[start + 3] < target) {
+      start += 4;
+    }
+    if (values[start + 1] < target) {
+      start += 2;
+    }
+    if (values[start] < target) {
+      start += 1;
+    }
+
+    return start;
+  }
+
+  @Benchmark
+  public void linearSearch() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      linearSearch(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int linearSearch(long[] values, long target, int startIndex) {
+    // Naive linear search.
+    for (int i = startIndex; i < values.length; ++i) {
+      if (values[i] >= target) {
+        return i;
+      }
+    }
+    return values.length;
+  }
+
+  @Benchmark
+  public void vectorUtilSearch() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      VectorUtil.findNextGEQ(values, 128, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int vectorUtilSearch(long[] values, long target, int startIndex) {
+    return VectorUtil.findNextGEQ(values, 128, target, startIndex);
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected: " + expected + ", got " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    // For testing purposes
+    long[] values = new long[129];
+    for (int i = 0; i < 128; ++i) {
+      values[i] = i;
+    }
+    values[128] = DocIdSetIterator.NO_MORE_DOCS;
+    for (int start = 0; start < 128; ++start) {
+      for (int targetIndex = start; targetIndex < 128; ++targetIndex) {
+        int actualIndex = binarySearch(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = inlinedBranchlessBinarySearch(values, values[targetIndex]);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = linearSearch(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = vectorUtilSearch(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+      }
+    }
+  }
+}