Skip to content

Commit

Permalink
Use Arrays.compareUnsigned instead of iterating compare. (apache#13252)
Browse files Browse the repository at this point in the history
  • Loading branch information
vsop-479 authored Apr 19, 2024
1 parent 1f1181a commit 3024e66
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 41 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ Improvements
Optimizations
---------------------

* GITHUB#13252: Replace handwritten loops compare with Arrays.compareUnsigned in SegmentTermsEnum. (zhouhui)

* GITHUB#12996: Reduce ArrayUtil#grow in decompress. (Zhang Chao)

* GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
Expand Down Expand Up @@ -387,31 +388,18 @@ public boolean seekExact(BytesRef target) throws IOException {
}

if (cmp == 0) {
final int targetUptoMid = targetUpto;

// Second compare the rest of the term, but
// don't save arc/output/frame; we only do this
// to find out if the target term is before,
// equal or after the current term
final int targetLimit2 = Math.min(target.length, term.length());
while (targetUpto < targetLimit2) {
cmp =
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" +
// targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset +
// targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
// }
if (cmp != 0) {
break;
}
targetUpto++;
}

if (cmp == 0) {
cmp = term.length() - target.length;
}
targetUpto = targetUptoMid;
cmp =
Arrays.compareUnsigned(
term.bytes(),
targetUpto,
term.length(),
target.bytes,
target.offset + targetUpto,
target.offset + target.length);
}

if (cmp < 0) {
Expand Down Expand Up @@ -666,28 +654,16 @@ public SeekStatus seekCeil(BytesRef target) throws IOException {
}

if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but
// don't save arc/output/frame:
final int targetLimit2 = Math.min(target.length, term.length());
while (targetUpto < targetLimit2) {
cmp =
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit
// + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto])
// + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")");
// }
if (cmp != 0) {
break;
}
targetUpto++;
}

if (cmp == 0) {
cmp = term.length() - target.length;
}
targetUpto = targetUptoMid;
cmp =
Arrays.compareUnsigned(
term.bytes(),
targetUpto,
term.length(),
target.bytes,
target.offset + targetUpto,
target.offset + target.length);
}

if (cmp < 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,42 @@ public void testGhosts() throws Exception {
dir.close();
}

// Test seek in disorder.
public void testDisorder() throws Exception {
Directory dir = newDirectory();

IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setCodec(getCodec());
iwc.setMergePolicy(newTieredMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);

for (int i = 0; i < 10000; i++) {
Document document = new Document();
document.add(new StringField("id", i + "", Field.Store.NO));
iw.addDocument(document);
}
iw.commit();
iw.forceMerge(1);

DirectoryReader reader = DirectoryReader.open(iw);
TermsEnum termsEnum = getOnlyLeafReader(reader).terms("id").iterator();

for (int i = 0; i < 20000; i++) {
int n = random().nextInt(0, 10000);
BytesRef target = new BytesRef(n + "");
// seekExact.
assertTrue(termsEnum.seekExact(target));
assertEquals(termsEnum.term(), target);
// seekCeil.
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(target));
assertEquals(termsEnum.term(), target);
}

reader.close();
iw.close();
dir.close();
}

protected void subCheckBinarySearch(TermsEnum termsEnum) throws Exception {}

public void testBinarySearchTermLeaf() throws Exception {
Expand Down

0 comments on commit 3024e66

Please sign in to comment.