Add new dynamic confidence interval configuration to scalar quantized…

… format (#13445) When int4 scalar quantization was merged, it added a new way to dynamically calculate quantiles. However, when that was merged, I inadvertently changed the default behavior, where a null confidenceInterval would actually calculate the dynamic quantiles instead of doing the previous auto-setting to 1 - 1/(dim + 1). This commit formalizes the dynamic quantile calculate through setting the confidenceInterval to 0, and preserves the previous behavior for null confidenceIntervals so that users upgrading will not see different quantiles than they would expect.
apache · Jun 1, 2024 · 84ff736 · 84ff736
1 parent 1e660ee
commit 84ff736
Show file tree

Hide file tree

Showing 8 changed files with 172 additions and 29 deletions.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -72,6 +72,10 @@ New Features
 
 * GITHUB#13414: Counts are always available in the result when using taxonomy facets. (Stefan Vodita)
 
+* GITHUB#13445: Add new option when calculating scalar quantiles. The new option of setting `confidenceInterval` to
+  `0` will now dynamically determine the quantiles through a grid search over multiple quantiles calculated
+  by multiple intervals. (Ben Trent)
+
 Improvements
 ---------------------
 

diff --git a/.../src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java b/.../src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java
@@ -93,7 +93,9 @@ public Lucene99HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
    *     lte 4 bits will be compressed into a single byte. If false, the vectors will be stored as
    *     is. This provides a trade-off of memory usage and speed.
    * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
-   *     it is calculated based on the vector field dimensions.
+   *     it is calculated based on the vector field dimensions. When `0`, the quantiles are
+   *     dynamically determined by sampling many confidence intervals and determining the most
+   *     accurate pair.
    * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
    *     generated by this format to do the merge
    */

diff --git a/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java b/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java
@@ -57,6 +57,9 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
   /** The maximum confidence interval */
   private static final float MAXIMUM_CONFIDENCE_INTERVAL = 1f;
 
+  /** Dynamic confidence interval */
+  public static final float DYNAMIC_CONFIDENCE_INTERVAL = 0f;
+
   /**
    * Controls the confidence interval used to scalar quantize the vectors the default value is
    * calculated as `1-1/(vector_dimensions + 1)`
@@ -76,7 +79,8 @@ public Lucene99ScalarQuantizedVectorsFormat() {
    * Constructs a format using the given graph construction parameters.
    *
    * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
-   *     it is calculated dynamically.
+   *     it is calculated based on the vector dimension. When `0`, the quantiles are dynamically
+   *     determined by sampling many confidence intervals and determining the most accurate pair.
    * @param bits the number of bits to use for scalar quantization (must be between 1 and 8,
    *     inclusive)
    * @param compress whether to compress the vectors, if true, the vectors that are quantized with
@@ -86,13 +90,15 @@ public Lucene99ScalarQuantizedVectorsFormat() {
   public Lucene99ScalarQuantizedVectorsFormat(
       Float confidenceInterval, int bits, boolean compress) {
     if (confidenceInterval != null
+        && confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL
         && (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL
             || confidenceInterval > MAXIMUM_CONFIDENCE_INTERVAL)) {
       throw new IllegalArgumentException(
           "confidenceInterval must be between "
               + MINIMUM_CONFIDENCE_INTERVAL
               + " and "
               + MAXIMUM_CONFIDENCE_INTERVAL
+              + " or 0"
               + "; confidenceInterval="
               + confidenceInterval);
     }

diff --git a/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java b/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
@@ -346,12 +346,19 @@ private static class FieldEntry implements Accountable {
       if (size > 0) {
         if (versionMeta < Lucene99ScalarQuantizedVectorsFormat.VERSION_ADD_BITS) {
           int floatBits = input.readInt(); // confidenceInterval, unused
-          if (floatBits == -1) {
+          if (floatBits == -1) { // indicates a null confidence interval
             throw new CorruptIndexException(
                 "Missing confidence interval for scalar quantizer", input);
           }
-          this.bits = (byte) 7;
-          this.compress = false;
+          float confidenceInterval = Float.intBitsToFloat(floatBits);
+          // indicates a dynamic interval, which shouldn't be provided in this version
+          if (confidenceInterval
+              == Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL) {
+            throw new CorruptIndexException(
+                "Invalid confidence interval for scalar quantizer: " + confidenceInterval, input);
+          }
+          bits = (byte) 7;
+          compress = false;
           float minQuantile = Float.intBitsToFloat(input.readInt());
           float maxQuantile = Float.intBitsToFloat(input.readInt());
           scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, (byte) 7);

diff --git a/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java b/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
@@ -18,6 +18,7 @@
 package org.apache.lucene.codecs.lucene99;
 
 import static org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
+import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL;
 import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.QUANTIZED_VECTOR_COMPONENT;
 import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval;
 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@@ -117,6 +118,9 @@ public Lucene99ScalarQuantizedVectorsWriter(
         false,
         rawVectorDelegate,
         scorer);
+    if (confidenceInterval != null && confidenceInterval == 0) {
+      throw new IllegalArgumentException("confidenceInterval cannot be set to zero");
+    }
   }
 
   public Lucene99ScalarQuantizedVectorsWriter(
@@ -347,6 +351,7 @@ private void writeMeta(
         meta.writeByte(bits);
         meta.writeByte(compress ? (byte) 1 : (byte) 0);
       } else {
+        assert confidenceInterval == null || confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL;
         meta.writeInt(
             Float.floatToIntBits(
                 confidenceInterval == null
@@ -667,22 +672,36 @@ public static ScalarQuantizer mergeAndRecalculateQuantiles(
           doc = vectorValues.nextDoc()) {
         numVectors++;
       }
-      mergedQuantiles =
-          confidenceInterval == null
-              ? ScalarQuantizer.fromVectorsAutoInterval(
-                  KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
-                  fieldInfo.getVectorSimilarityFunction(),
-                  numVectors,
-                  bits)
-              : ScalarQuantizer.fromVectors(
-                  KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
-                  confidenceInterval,
-                  numVectors,
-                  bits);
+      return buildScalarQuantizer(
+          KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
+          numVectors,
+          fieldInfo.getVectorSimilarityFunction(),
+          confidenceInterval,
+          bits);
     }
     return mergedQuantiles;
   }
 
+  static ScalarQuantizer buildScalarQuantizer(
+      FloatVectorValues floatVectorValues,
+      int numVectors,
+      VectorSimilarityFunction vectorSimilarityFunction,
+      Float confidenceInterval,
+      byte bits)
+      throws IOException {
+    if (confidenceInterval != null && confidenceInterval == DYNAMIC_CONFIDENCE_INTERVAL) {
+      return ScalarQuantizer.fromVectorsAutoInterval(
+          floatVectorValues, vectorSimilarityFunction, numVectors, bits);
+    }
+    return ScalarQuantizer.fromVectors(
+        floatVectorValues,
+        confidenceInterval == null
+            ? calculateDefaultConfidenceInterval(floatVectorValues.dimension())
+            : confidenceInterval,
+        numVectors,
+        bits);
+  }
+
   /**
    * Returns true if the quantiles of the new quantization state are too far from the quantiles of
    * the existing quantization state. This would imply that floating point values would slightly
@@ -785,14 +804,12 @@ void finish() throws IOException {
       }
       FloatVectorValues floatVectorValues = new FloatVectorWrapper(floatVectors, normalize);
       ScalarQuantizer quantizer =
-          confidenceInterval == null
-              ? ScalarQuantizer.fromVectorsAutoInterval(
-                  floatVectorValues,
-                  fieldInfo.getVectorSimilarityFunction(),
-                  floatVectors.size(),
-                  bits)
-              : ScalarQuantizer.fromVectors(
-                  floatVectorValues, confidenceInterval, floatVectors.size(), bits);
+          buildScalarQuantizer(
+              floatVectorValues,
+              floatVectors.size(),
+              fieldInfo.getVectorSimilarityFunction(),
+              confidenceInterval,
+              bits);
       minQuantile = quantizer.getLowerQuantile();
       maxQuantile = quantizer.getUpperQuantile();
       if (infoStream.isEnabled(QUANTIZED_VECTOR_COMPONENT)) {

diff --git a/...re/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java b/...re/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
@@ -62,7 +62,10 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
   @Override
   public void setUp() throws Exception {
     bits = random().nextBoolean() ? 4 : 7;
-    confidenceInterval = random().nextBoolean() ? 0.99f : null;
+    confidenceInterval = random().nextBoolean() ? (0.9f + random().nextFloat() * 0.1f) : null;
+    if (random().nextBoolean()) {
+      confidenceInterval = 0f;
+    }
     format =
         new Lucene99HnswScalarQuantizedVectorsFormat(
             Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
@@ -134,15 +137,17 @@ public void testQuantizedVectorsWriteAndRead() throws Exception {
       vectors.add(randomVector(dim));
     }
     ScalarQuantizer scalarQuantizer =
-        confidenceInterval == null
+        confidenceInterval != null && confidenceInterval == 0f
             ? ScalarQuantizer.fromVectorsAutoInterval(
                 new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(vectors, normalize),
                 similarityFunction,
                 numVectors,
                 (byte) bits)
             : ScalarQuantizer.fromVectors(
                 new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(vectors, normalize),
-                confidenceInterval,
+                confidenceInterval == null
+                    ? Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval(dim)
+                    : confidenceInterval,
                 numVectors,
                 (byte) bits);
     float[] expectedCorrections = new float[numVectors];

diff --git a/...e/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorScorer.java b/...e/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorScorer.java
@@ -60,7 +60,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
             1,
             bits,
             compress,
-            null,
+            0f,
             null);
       }
     };

diff --git a/.../src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsWriter.java b/.../src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsWriter.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.codecs.lucene99;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.quantization.ScalarQuantizer;
+
+public class TestLucene99ScalarQuantizedVectorsWriter extends LuceneTestCase {
+
+  public void testBuildScalarQuantizerCosine() throws IOException {
+    assertScalarQuantizer(
+        new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 7, VectorSimilarityFunction.COSINE);
+    assertScalarQuantizer(
+        new float[] {0.28759837f, 0.62449116f}, 0f, (byte) 7, VectorSimilarityFunction.COSINE);
+    assertScalarQuantizer(
+        new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 4, VectorSimilarityFunction.COSINE);
+    assertScalarQuantizer(
+        new float[] {0.37247902f, 0.58848244f}, 0f, (byte) 4, VectorSimilarityFunction.COSINE);
+  }
+
+  public void testBuildScalarQuantizerDotProduct() throws IOException {
+    assertScalarQuantizer(
+        new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 7, VectorSimilarityFunction.DOT_PRODUCT);
+    assertScalarQuantizer(
+        new float[] {0.28759837f, 0.62449116f}, 0f, (byte) 7, VectorSimilarityFunction.DOT_PRODUCT);
+    assertScalarQuantizer(
+        new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 4, VectorSimilarityFunction.DOT_PRODUCT);
+    assertScalarQuantizer(
+        new float[] {0.37247902f, 0.58848244f}, 0f, (byte) 4, VectorSimilarityFunction.DOT_PRODUCT);
+  }
+
+  public void testBuildScalarQuantizerMIP() throws IOException {
+    assertScalarQuantizer(
+        new float[] {2.0f, 20.0f}, 0.9f, (byte) 7, VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
+    assertScalarQuantizer(
+        new float[] {2.4375f, 19.0625f},
+        0f,
+        (byte) 7,
+        VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
+    assertScalarQuantizer(
+        new float[] {2.0f, 20.0f}, 0.9f, (byte) 4, VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
+    assertScalarQuantizer(
+        new float[] {2.6875f, 19.0625f},
+        0f,
+        (byte) 4,
+        VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
+  }
+
+  public void testBuildScalarQuantizerEuclidean() throws IOException {
+    assertScalarQuantizer(
+        new float[] {2.0f, 20.0f}, 0.9f, (byte) 7, VectorSimilarityFunction.EUCLIDEAN);
+    assertScalarQuantizer(
+        new float[] {2.125f, 19.375f}, 0f, (byte) 7, VectorSimilarityFunction.EUCLIDEAN);
+    assertScalarQuantizer(
+        new float[] {2.0f, 20.0f}, 0.9f, (byte) 4, VectorSimilarityFunction.EUCLIDEAN);
+    assertScalarQuantizer(
+        new float[] {2.1875f, 19.0625f}, 0f, (byte) 4, VectorSimilarityFunction.EUCLIDEAN);
+  }
+
+  private void assertScalarQuantizer(
+      float[] expectedQuantiles,
+      Float confidenceInterval,
+      byte bits,
+      VectorSimilarityFunction vectorSimilarityFunction)
+      throws IOException {
+    List<float[]> vectors = new ArrayList<>(30);
+    for (int i = 0; i < 30; i++) {
+      float[] vector = new float[] {i, i + 1, i + 2, i + 3};
+      vectors.add(vector);
+    }
+    FloatVectorValues vectorValues =
+        new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(
+            vectors,
+            vectorSimilarityFunction == VectorSimilarityFunction.COSINE
+                || vectorSimilarityFunction == VectorSimilarityFunction.DOT_PRODUCT);
+    ScalarQuantizer scalarQuantizer =
+        Lucene99ScalarQuantizedVectorsWriter.buildScalarQuantizer(
+            vectorValues, 30, vectorSimilarityFunction, confidenceInterval, bits);
+    assertEquals(expectedQuantiles[0], scalarQuantizer.getLowerQuantile(), 0.0001f);
+    assertEquals(expectedQuantiles[1], scalarQuantizer.getUpperQuantile(), 0.0001f);
+  }
+}