Skip to content

Commit

Permalink
Add ability to limit results retrieved from Lucene
Browse files Browse the repository at this point in the history
Allows the records (rids) retrieved from the Lucene search to be limited, where it is known that the remainder of the query does not require the entire set to be loaded.
This is useful when the underlying Lucene query returns many results, but the query overall is only intended to return a small number of them (usually in the ranked order from Lucene).
This mode is opt in, by providing a "limit" metadata element to the Lucene search function. A value of "select' uses the skip/limit in the SELECT statement to determine the max hits, and an integral value specifies an explicit max hits (e.g. for a safety margin).
  • Loading branch information
timw committed Sep 3, 2024
1 parent aaf84ba commit def5d60
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.orientechnologies.lucene.engine.OLuceneIndexEngineAbstract;
import com.orientechnologies.lucene.engine.OLuceneIndexEngineUtils;
import com.orientechnologies.lucene.exception.OLuceneIndexException;
import com.orientechnologies.lucene.functions.OLuceneFunctionsUtils;
import com.orientechnologies.lucene.query.OLuceneQueryContext;
import com.orientechnologies.lucene.tx.OLuceneTxChangesAbstract;
import com.orientechnologies.orient.core.command.OCommandContext;
Expand Down Expand Up @@ -67,6 +68,7 @@ public class OLuceneResultSet implements Set<OIdentifiable> {
private int maxNumFragments;
private TopDocs topDocs;
private long deletedMatchCount = 0;
private long returnedHits = 0;

private boolean closed = false;

Expand Down Expand Up @@ -99,6 +101,10 @@ public OLuceneResultSet(
highlighter = new Highlighter(formatter, scorer);

maxNumFragments = (int) Optional.ofNullable(highlight.get("maxNumFragments")).orElse(2);

final Long queryMaxHits = OLuceneFunctionsUtils.getResultLimit(queryContext.getContext());
long maxHits = (queryMaxHits == null) ? Integer.MAX_VALUE : queryMaxHits;
this.returnedHits = Math.min(maxHits, topDocs.totalHits - deletedMatchCount);
}

protected void fetchFirstBatch() {
Expand Down Expand Up @@ -180,7 +186,7 @@ protected long calculateDeletedMatch() {

@Override
public int size() {
return (int) Math.max(0, topDocs.totalHits - deletedMatchCount);
return (int) Math.max(0, this.returnedHits);
}

@Override
Expand All @@ -201,12 +207,15 @@ public OLuceneResultSetIteratorTx() {
localIndex = 0;
scoreDocs = topDocs.scoreDocs;
OLuceneIndexEngineUtils.sendTotalHits(
indexName, queryContext.getContext(), topDocs.totalHits - deletedMatchCount);
indexName,
queryContext.getContext(),
topDocs.totalHits - deletedMatchCount,
returnedHits);
}

@Override
public boolean hasNext() {
final boolean hasNext = index < (totalHits - deletedMatchCount);
final boolean hasNext = (index < returnedHits);
if (!hasNext && !closed) {
final IndexSearcher searcher = queryContext.getSearcher();
engine.release(searcher);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
/** Created by frank on 04/05/2017. */
public class OLuceneIndexEngineUtils {

public static void sendTotalHits(String indexName, OCommandContext context, long totalHits) {
public static void sendTotalHits(
String indexName, OCommandContext context, long totalHits, long returnedHits) {
if (context != null) {

if (context.getVariable("totalHits") == null) {
Expand All @@ -36,6 +37,12 @@ public static void sendTotalHits(String indexName, OCommandContext context, long
context.setVariable("totalHits", null);
}
context.setVariable((indexName + ".totalHits").replace(".", "_"), totalHits);
if (context.getVariable("returnedHits") == null) {
context.setVariable("returnedHits", returnedHits);
} else {
context.setVariable("returnedHits", null);
}
context.setVariable((indexName + ".returnedHits").replace(".", "_"), returnedHits);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@
import com.orientechnologies.orient.core.db.ODatabaseDocumentInternal;
import com.orientechnologies.orient.core.db.record.OIdentifiable;
import com.orientechnologies.orient.core.metadata.OMetadataInternal;
import com.orientechnologies.orient.core.record.impl.ODocument;
import com.orientechnologies.orient.core.sql.parser.OExpression;
import com.orientechnologies.orient.core.sql.parser.OFromClause;
import com.orientechnologies.orient.core.sql.parser.OSelectStatement;
import org.apache.lucene.index.memory.MemoryIndex;

/** Created by frank on 13/02/2017. */
public class OLuceneFunctionsUtils {
public static final String MEMORY_INDEX = "_memoryIndex";

private static final String MAX_HITS = "luceneMaxHits";

protected static OLuceneFullTextIndex searchForIndex(OExpression[] args, OCommandContext ctx) {
final String indexName = (String) args[0].execute((OIdentifiable) null, ctx);
return getLuceneFullTextIndex(ctx, indexName);
Expand Down Expand Up @@ -57,4 +62,29 @@ public static String doubleEscape(final String s) {
}
return sb.toString();
}

public static void configureResultLimit(
OFromClause target, OCommandContext ctx, ODocument metadata) {
Object limitType = metadata.getProperty("limit");

long maxHits = 0;
if ("select".equals(limitType) && target.jjtGetParent() instanceof OSelectStatement) {
OSelectStatement select = (OSelectStatement) target.jjtGetParent();
if (select.getLimit() != null) {
maxHits += ((Number) select.getLimit().getValue(ctx)).longValue();
}
if (select.getSkip() != null) {
maxHits += ((Number) select.getSkip().getValue(ctx)).longValue();
}
} else if (limitType instanceof Number) {
maxHits = ((Number) limitType).longValue();
}
if (maxHits != 0) {
ctx.setVariable(MAX_HITS, maxHits);
}
}

public static Long getResultLimit(OCommandContext ctx) {
return (Long) ctx.getVariable(OLuceneFunctionsUtils.MAX_HITS);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public Iterable<OIdentifiable> searchFromTarget(
OExpression expression = args[0];

ODocument metadata = parseMetadata(args);
OLuceneFunctionsUtils.configureResultLimit(target, ctx, metadata);

List<String> ridsAsString = parseRids(ctx, expression);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ public Iterable<OIdentifiable> searchFromTarget(
if (index != null) {

ODocument metadata = getMetadata(args, ctx);
OLuceneFunctionsUtils.configureResultLimit(target, ctx, metadata);

List<OIdentifiable> luceneResultSet;
try (Stream<ORID> rids =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ public Iterable<OIdentifiable> searchFromTarget(
if (index != null) {

ODocument meta = getMetadata(args, ctx);
OLuceneFunctionsUtils.configureResultLimit(target, ctx, meta);
Set<OIdentifiable> luceneResultSet;
try (Stream<ORID> rids =
index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ public Iterable<OIdentifiable> searchFromTarget(
if (index != null && query != null) {

ODocument meta = getMetadata(args, ctx);
OLuceneFunctionsUtils.configureResultLimit(target, ctx, meta);

List<OIdentifiable> luceneResultSet;
try (Stream<ORID> rids =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
*
* * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*/

package com.orientechnologies.lucene.tests;

import static org.assertj.core.api.Assertions.assertThat;

import com.orientechnologies.orient.core.sql.executor.OResult;
import com.orientechnologies.orient.core.sql.executor.OResultSet;
import java.io.InputStream;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.Before;
import org.junit.Test;

public class OLuceneLimitResultsTest extends OLuceneBaseTest {

@Before
public void init() {
InputStream stream = ClassLoader.getSystemResourceAsStream("testLuceneIndex.sql");

db.execute("sql", getScriptFromStream(stream));

db.command("create index Song.title on Song (title) FULLTEXT ENGINE LUCENE");
}

private void checkSongTitleHits(
String query, int expectedResultSetSize, int expectedTotalHits, int expectedReturnedHits) {
OResultSet docs = db.query(query);

List<OResult> results = docs.stream().collect(Collectors.toList());
assertThat(results).hasSize(expectedResultSetSize);

OResult doc = results.get(0);
System.out.println("doc.toElement().toJSON() = " + doc.toElement().toJSON());

assertThat(doc.<Long>getProperty("$totalHits")).isEqualTo(expectedTotalHits);
assertThat(doc.<Long>getProperty("$Song_title_totalHits")).isEqualTo(expectedTotalHits);
assertThat(doc.<Long>getProperty("$returnedHits")).isEqualTo(expectedReturnedHits);
assertThat(doc.<Long>getProperty("$Song_title_returnedHits")).isEqualTo(expectedReturnedHits);
docs.close();
}

@Test
public void testLimitSelect() {
checkSongTitleHits(
"select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits "
+ "from Song where search_class('title:man', {\"limit\":\"select\"})= true limit 1",
1,
14,
1);

checkSongTitleHits(
"select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits "
+ "from Song where search_class('title:man', {\"limit\":\"select\"})= true skip 5 limit 5",
5,
14,
10);
}

@Test
public void testLimitByNumber() {
checkSongTitleHits(
"select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits from Song "
+ "where search_class('title:man', {\"limit\": 5})= true limit 1",
1,
14,
5);

checkSongTitleHits(
"select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits from Song "
+ "where search_class('title:man', {\"limit\": 5})= true limit 10",
5,
14,
5);
}
}

0 comments on commit def5d60

Please sign in to comment.