From a5204d9187468978a5487c4dd75c7c4c3a4d6a66 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Wed, 9 Oct 2024 08:04:46 -0400 Subject: [PATCH] #13867: add tooling to generate int7_hnsw.9.10.zip bwc index --- .../BackwardsCompatibilityTestBase.java | 4 +- .../TestBasicBackwardsCompatibility.java | 2 +- .../TestGenerateBwcIndices.java | 12 +- .../TestInt7HnswBackwardsCompatibility.java | 150 ++++++++++++++++++ .../backward_index/int7_hnsw.9.10.0.zip | Bin 0 -> 5861 bytes 5 files changed, 164 insertions(+), 4 deletions(-) create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt7HnswBackwardsCompatibility.java create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int7_hnsw.9.10.0.zip diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java index 2736a128dbe9..b4774155a373 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java @@ -105,8 +105,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase { * This is a base constructor for parameterized BWC tests. The constructor arguments are provided * by {@link com.carrotsearch.randomizedtesting.RandomizedRunner} during test execution. A {@link * com.carrotsearch.randomizedtesting.annotations.ParametersFactory} specified in a subclass - * provides a list lists of arguments for the tests and RandomizedRunner will execute the test for - * each of the argument list. + * provides a list of arguments for the tests and RandomizedRunner will execute the test for each + * of the argument list. * * @param version the version this test should run for * @param indexPattern an index pattern in order to open an index of see {@link diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java index 1217c064c0e9..c5e7b213d0a7 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java @@ -511,7 +511,7 @@ private static void doTestHits(ScoreDoc[] hits, int expectedCount, IndexReader r } } - private static ScoreDoc[] assertKNNSearch( + static ScoreDoc[] assertKNNSearch( IndexSearcher searcher, float[] queryVector, int k, diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java index c7b1ea3fb4a9..6989731ae141 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java @@ -39,7 +39,7 @@ public class TestGenerateBwcIndices extends LuceneTestCase { // To generate backcompat indexes with the current default codec, run the following gradle // command: // gradlew test -Ptests.bwcdir=/path/to/store/indexes -Ptests.codec=default - // -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices + // -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices --max-workers=1 // // Also add testmethod with one of the index creation methods below, for example: // -Ptestmethod=testCreateCFS @@ -82,6 +82,16 @@ public void testCreateSortedIndex() throws IOException { sortedTest.createBWCIndex(); } + public void testCreateInt7HNSWIndices() throws IOException { + TestInt7HnswBackwardsCompatibility int7HnswBackwardsCompatibility = + new TestInt7HnswBackwardsCompatibility( + Version.LATEST, + createPattern( + TestInt7HnswBackwardsCompatibility.INDEX_NAME, + TestInt7HnswBackwardsCompatibility.SUFFIX)); + int7HnswBackwardsCompatibility.createBWCIndex(); + } + private boolean isInitialMajorVersionRelease() { return Version.LATEST.equals(Version.fromBits(Version.LATEST.major, 0, 0)); } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt7HnswBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt7HnswBackwardsCompatibility.java new file mode 100644 index 000000000000..bdf43378afaa --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt7HnswBackwardsCompatibility.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_index; + +import static org.apache.lucene.backward_index.TestBasicBackwardsCompatibility.assertKNNSearch; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import java.io.IOException; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.Version; + +public class TestInt7HnswBackwardsCompatibility extends BackwardsCompatibilityTestBase { + + static final String INDEX_NAME = "int7_hnsw"; + static final String SUFFIX = ""; + private static final Version FIRST_INT7_HNSW_VERSION = Version.LUCENE_9_10_0; + private static final String KNN_VECTOR_FIELD = "knn_field"; + private static final int DOC_COUNT = 30; + private static final FieldType KNN_VECTOR_FIELD_TYPE = + KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE); + private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f}; + + public TestInt7HnswBackwardsCompatibility(Version version, String pattern) { + super(version, pattern); + } + + /** Provides all sorted versions to the test-framework */ + @ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s") + public static Iterable testVersionsFactory() throws IllegalAccessException { + return allVersion(INDEX_NAME, SUFFIX); + } + + protected Codec getCodec() { + return new Lucene99Codec() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return new Lucene99HnswScalarQuantizedVectorsFormat( + Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN, + Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH); + } + }; + } + + @Override + protected boolean supportsVersion(Version version) { + return version.onOrAfter(FIRST_INT7_HNSW_VERSION); + } + + @Override + void verifyUsesDefaultCodec(Directory dir, String name) throws IOException { + // We don't use the default codec + } + + public void testInt7HnswIndexAndSearch() throws Exception { + IndexWriterConfig indexWriterConfig = + newIndexWriterConfig(new MockAnalyzer(random())) + .setOpenMode(IndexWriterConfig.OpenMode.APPEND) + .setCodec(getCodec()) + .setMergePolicy(newLogMergePolicy()); + try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) { + // add 10 docs + for (int i = 0; i < 10; i++) { + writer.addDocument(knnDocument(i + DOC_COUNT)); + if (random().nextBoolean()) { + writer.flush(); + } + } + if (random().nextBoolean()) { + writer.forceMerge(1); + } + writer.commit(); + try (IndexReader reader = DirectoryReader.open(directory)) { + IndexSearcher searcher = new IndexSearcher(reader); + assertKNNSearch(searcher, KNN_VECTOR, 1000, DOC_COUNT + 10, "0"); + assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); + } + } + // This will confirm the docs are really sorted + TestUtil.checkIndex(directory); + } + + @Override + protected void createIndex(Directory dir) throws IOException { + IndexWriterConfig conf = + new IndexWriterConfig(new MockAnalyzer(random())) + .setMaxBufferedDocs(10) + .setCodec(getCodec()) + .setMergePolicy(NoMergePolicy.INSTANCE); + try (IndexWriter writer = new IndexWriter(dir, conf)) { + for (int i = 0; i < DOC_COUNT; i++) { + writer.addDocument(knnDocument(i)); + } + writer.forceMerge(1); + } + try (DirectoryReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = new IndexSearcher(reader); + assertKNNSearch(searcher, KNN_VECTOR, 1000, DOC_COUNT, "0"); + assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); + } + } + + private static Document knnDocument(int id) { + Document doc = new Document(); + float[] vector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * id}; + doc.add(new KnnFloatVectorField(KNN_VECTOR_FIELD, vector, KNN_VECTOR_FIELD_TYPE)); + doc.add(new StringField("id", Integer.toString(id), Field.Store.YES)); + return doc; + } + + public void testReadOldIndices() throws Exception { + try (DirectoryReader reader = DirectoryReader.open(directory)) { + IndexSearcher searcher = new IndexSearcher(reader); + assertKNNSearch(searcher, KNN_VECTOR, 1000, DOC_COUNT, "0"); + assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); + } + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int7_hnsw.9.10.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int7_hnsw.9.10.0.zip new file mode 100644 index 0000000000000000000000000000000000000000..0425b451fa0c8998731c22f27728c936b5083b41 GIT binary patch literal 5861 zcmc(jXHb({w}9Ueh!g`sdK0OFfl!nZ0YN~j^j@SFL8MC&q<2BOgkB^8MFa#Cq$40o z7Z3&My$DjIS-1f`bHRJ`^UR$2ZZb*U{MgUzwchL}Yb{kpFfJjm|JaU|(>u8M@kR~M z0|*3u)^U&UtZ#S_0U+E@AOOIf0|S66 zmPn3e-~**tB8`9I^dj~f3I_lnSfVj6!raR8VEz8uzgb^em%)Nq)<{W9NwT#WOUpEf zF-}RENYPPGNY-x$a0!iPf)-06TB_buz2wi#td8}MzQn_gZuA4;&}W_V7419T<^isW zYY)K5uQ(>SlkjDVLDk84Y1aiw%`_pe>KTJ(!&7Zw&(FGG7%yw&arfjBWVq<0` zZ>43cpljx~JGgZ)^Aje5CuD}_JVoIx=sywrW zqq)bA(u zASA*=%dUC8_yjzj$xlUAX1zwM({C&VUF#yg1m@}*L=`mlmBFS1mFuSRV$OXtq_yeq z-+shqXm!(Nocrdq#JAbvI(Sn^AgK_e#pJR{wMCQudLtru`rYP(6UM`glsNH5v6eLt zg%hJ>tBQHw=D#V~K~;EMkQU^B7mvmml@~qm zHtbZD*d=*L%hNkzTH@w*W9zLgkpCzR?W)_#+&Kh~F1@Rs2U?$v#f^np>ACK(#Jv*X zn5hZIA%|Yo^$!-Dr>Q@6U3SpFFQ6}?jLe4WwSBJGu8awbli{xA1@w4@A05pY%USog zfFzj{!9;>_qpZ5L84=9{!S-{XRqehm#cVx6R8qqW0N}(2X$FN9<47_DojxT>;Gdy? zPhNNQuDMr2NL%FXibfw!o<-=B4U?0gyC;Yf%rDVK_R5qk6$FguN3%9=ZR(7=Y}+cf z*F`+so{N)DSAU*=*CJoiW~nXNqZGbn^{noyhNcb>$=dj;N=ck>2Y!CQe8%B@+ISc4 z#Y>XGri~t4lx-AtU^!mJ07_rqk-p9vntQ_d4SY{wZkGCE!kwf;J{-O&(MV!G;tZNa z7pQwfO*XUW+=yzhTMkhcE0^1EM2o6rK=9~g&$0y8bFe)r}I8>#5F}rkmbc`n86h zb*9!*%gC=Lo(OJ(E>>n^@mCC(rmBx2?V4;xMGmbN$Ao5^#cB7ZS8kgF(K)^MnY_*? z>R%cxc=sx$JQIN!B*;uFtnc++aDLKn<^=O}-o&Mq+dv0#?jggfFfJzVIA97|5qY_cTlivQ zGh28iUUkTf3?;h^#wvsoV?}A#yo0{<^8P~`B)XZ#8)=gF+_hA1frfUD#t}3Oq8JzG zE$uryfLHlSbs#30dqjc<$(=|cM`0}~+#I>)xR*3Vg~n7^VrFv5tEQ=@+3(Ix&*bGg zBP$`HO~;N^JbT7V-;Mf$vtSc)f4#!|rwm#!xj>HCp6_3_eegfAedU4eDO4Ly{;}-| ziP^(y!&SdRa_7603;+R>ecLnqWqZ(f+Y1Nc^#gsy?MehD!C@=RUH-v1cCcFqx(7el zJFvaVciW5ou)T(=H{z#MSrZMds$gxu`IqhYQ}rKgZ!D*wMqM}izHlZY#x=!vCNlyC zt{mu?CrnA^X2ZK4$J(bqRKx#%*{rIKsbt=KOlnLK!(-W}NII8~kC2DdTwdh9SwKY; z!2I|W;ZlZYr`u*1xu|2^`mk5$%4eU^l`oY~+-uifXA~R6;KJR{jMdTND`qGsEt&Oc zKNS>tn=2r!@?uG5vqRJHtA)mFQtXaf(b#&>Gqz~h{HF98GbJ77!r6+#mZ&G574JT^o^b=vEI77>D zne($JY@l2`UMFvWdoT3v_{lM3#%!u5nZt~M2hr%Or>ojvTsE&3YHl0X+HVo&PaSJ* zbM8qI;!ANBE2y!TVPjK( z-|-+Ir$sW~UzKPaf$uJrWlpJ=(w8#-*?5WBNG7gn+2tjTnpf}_x*K$5F#0S@ZAO)# z{(|B2_CdrxXqwLUi~^-qc$dpb-qQDM^o#>J^f%Mu%pV&Jzf4esaNp;NXFvuHIFa~e zSb6MJ#J0WRvA*K2TL*8+^=XD(HABhpjhpi)4~CPRLUSqU?=22>Y&;W!QKs^}9Y5i{ zX+e;~F?h$Vog>lf-V<)S(8Sw9BN$KXVHVW5)fT>_R_v7o&r~+4DR@jkH2B8qcvFlK zi6`;e8*TFFauJB8E~!ZSL?GDk$`xwjtCxL1pUwrU+1io(B(xd%hNdI#=l_@8pZ@>m zev1_SZNRpkeXTlCBoI$ae81Ai-LLdxt$nfqI_XiSwT;uGeha>V_~q5_$(CSu=+7yW z?58Tx`YLQb4*XN9G}efgqz}7)KUKeXe`1#kw$gv$Ze_rw1-r;9T@b|=0t+0vlC@z5 zAA`VHFw!_l&z@9avWIP}qM%HOpxfeMRLat3tY^Y+GVzh}&{cTAdmL9uc-iSIP}KKi z8q)bo(_aU~@XC*-erVsS>8egn|Kxk8pW%a2sHN7X6GQm+2WkB%SA~=&RL9p_R#`8N z=$_@?Mr=;@1?(h^rujy;Q{8jsid)cTamI+mao*RpMd@2Q7NAg{qc#LSlt_D2h;z1m zb>^?VFBCeiT|zdy8eGvR-16L$JTD`1i%nf$vnG*NPDTLvE*U>bGCQZI;Q`fX(Zmfc zD(4$A&j)>rZH%P|D%Gjqxc z7H}$XLI)Vh2kK%$kBat>xI)a@>D zTn`nMhHDU^N)HlHaxH98c4}&BbT;z5T+O*t0HJ)}r=7*i7npuq=8M+J97vKbor z@ku1cOz!H^+J5~ob$*!{K_$=J!diH0fueHb;xL#p4qVnT!xLwkjv1M>8@H^ zywj|sGjXflqS-)L&`K7WCd~H2?)vg=?WE&R^!F}5p@QBqBk_x8uZ~@7H?4)JN2Q-B z=|V?_SE`?hm>_|OLfYoPT}dc^xZE{x;sc$H`~aQA#sKiiM5!@h>M?L9uP4* zpOu+R5v7o&bK#n-{YUxEj*%7KGk)L3wCb}4R<}N_ldyHF^wgA)e8k*D4tbd(Qw7M; zToSu&@1(SiVYUyyZMfF-3#~)e21^q*>CqG%h0(7eI2#d*jJ_!)oeUU<&S9zH2XPX| z^0o;Brk_8cRqtk95PR>Jw>xE_d{a-0NP8$j0j_WkT=Bu0136GUw9EH!hbc@3z}?wE zoJgw%@Uj_LH{5#hS#B226L#njFKnzoz2jy4?I+=DgcE2S5kCLFC;asPmhff%k?;jZ zphbXvuuDe}cl#wp}8`wo5LS*4HiVU0sa%{!`D? zsrJ2BQg5%XuSb8hRIWH#aJBkcO)A&=&dVxlD6*1TnC6sUAdMU^7!Nf8?IyRM`pBuI z+@V-r_JsuR6m+i#kc%W$nzbtKxx5;3b;6)k(21^3n&sT;2aD7SvWYz~ZM?BWV8D1$ zf+FZ(+uvtg`xB`j>SLm{chW;iXfbm;9pJ3sDD6Vz=wPO@FXRM>W)M3`OV?S7b9OBVJ@Gxr^d-t*X`tuNv zRz!zUqQ|kQANA3ng?O}NILxw!{$Txah)30Tn6(HyVEuWBM;&k&#eD)eVuas?`K!Zu em^X#x9X_hR;^AR`JpgbV`^>{uSR7>g-~I<9>@NZU literal 0 HcmV?d00001