Skip to content

Commit

Permalink
Optimize DFS while marking connected components (#14022) (#14105)
Browse files Browse the repository at this point in the history
* Optimize DFS while marking connected components (#14022)

* Add CHANGES.txt entry for HNSW DFS Optimization #14022
  • Loading branch information
viswanathk authored Jan 7, 2025
1 parent 7146046 commit c795d74
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ Optimizations
* GITHUB#14040: Specialized top-level DisjunctionMaxQuery evaluation when the
tie break multiplier is 0. (Adrien Grand)

* GITHUB#14022: Optimize DFS marking of connected components in HNSW by reducing stack depth, improving performance
and reducing allocations. (Viswanath Kuchibhotla)

Bug Fixes
---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
Expand Down
14 changes: 12 additions & 2 deletions lucene/core/src/java/org/apache/lucene/util/hnsw/HnswUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.internal.hppc.IntHashSet;
import org.apache.lucene.util.FixedBitSet;

/** Utilities for use in tests involving HNSW graphs */
Expand Down Expand Up @@ -105,7 +106,9 @@ static List<Component> components(
} else {
entryPoint = connectedNodes.nextSetBit(0);
}
components.add(new Component(entryPoint, total));
if (total > 0) {
components.add(new Component(entryPoint, total));
}
if (level == 0) {
int nextClear = nextClearBit(connectedNodes, 0);
while (nextClear != NO_MORE_DOCS) {
Expand Down Expand Up @@ -163,6 +166,10 @@ private static Component markRooted(
throws IOException {
// Start at entry point and search all nodes on this level
// System.out.println("markRooted level=" + level + " entryPoint=" + entryPoint);
if (connectedNodes.get(entryPoint)) {
return new Component(entryPoint, 0);
}
IntHashSet nodesInStack = new IntHashSet();
Deque<Integer> stack = new ArrayDeque<>();
stack.push(entryPoint);
int count = 0;
Expand All @@ -178,7 +185,10 @@ private static Component markRooted(
int friendCount = 0;
while ((friendOrd = hnswGraph.nextNeighbor()) != NO_MORE_DOCS) {
++friendCount;
stack.push(friendOrd);
if (connectedNodes.get(friendOrd) == false && nodesInStack.contains(friendOrd) == false) {
stack.push(friendOrd);
nodesInStack.add(friendOrd);
}
}
if (friendCount < maxConn && notFullyConnected != null) {
notFullyConnected.set(node);
Expand Down

0 comments on commit c795d74

Please sign in to comment.