Skip to content

Commit

Permalink
[fix][dingo-executor] Fix text_search with hint does not work
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaoyuan2024 authored and githubgxll committed Oct 22, 2024
1 parent 2ea60ce commit e72f128
Show file tree
Hide file tree
Showing 15 changed files with 690 additions and 51 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright 2021 DataCanvas
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.dingodb.calcite.rel;

import io.dingodb.calcite.utils.RelDataTypeUtils;
import io.dingodb.calcite.visitor.DingoRelVisitor;
import io.dingodb.common.CommonId;
import io.dingodb.common.type.TupleMapping;
import io.dingodb.meta.entity.Table;
import lombok.Getter;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptTable;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeFieldImpl;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.type.SqlTypeName;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.List;

public class DingoGetDocumentPreFilter extends Filter implements DingoRel {
@Getter
protected CommonId indexTableId;

@Getter
protected Table indexTable;

@Getter
protected Integer documentPriIdIndex;

@Getter
protected Integer documentIndex;

@Getter
protected final RelOptTable table;

@Getter
protected final List<Object> operands;

@Getter
protected final TupleMapping selection;

public DingoGetDocumentPreFilter(RelOptCluster cluster, RelTraitSet traits,
RelNode child,
RexNode condition,
RelOptTable table,
List<Object> operands,
Integer documentIdIndex,
Integer documentIndex,
CommonId indexTableId,
TupleMapping selection,
Table indexTable) {
super(cluster, traits, child, condition);
this.table = table;
this.operands = operands;
this.documentPriIdIndex = documentIdIndex;
this.documentIndex = documentIndex;
this.indexTableId = indexTableId;
this.selection = selection;
this.indexTable = indexTable;
}

@Override
public Filter copy(RelTraitSet traitSet, RelNode input, RexNode condition) {
return new DingoGetDocumentPreFilter(
getCluster(),
traitSet,
input,
condition,
table, operands, documentPriIdIndex, documentIndex, indexTableId, selection, indexTable);
}

@Override
public @Nullable RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
double rowCount = mq.getRowCount(this.getInput());
if (rowCount < 1000) {
rowCount = 1;
} else {
rowCount = 10000D;
}
return DingoCost.FACTORY.makeCost(rowCount, 0, 0);
}

@Override
protected RelDataType deriveRowType() {
return RelDataTypeUtils.mapType(
getCluster().getTypeFactory(),
getTableType(),
selection
);
}

public RelDataType getTableType() {
RelDataType relDataType = table.getRowType();
RelDataTypeFactory.Builder builder = getCluster().getTypeFactory().builder();
builder.addAll(relDataType.getFieldList());
builder.add(new RelDataTypeFieldImpl(
indexTable.getName() + "$rank_bm25",
relDataType.getFieldCount(),
getCluster().getTypeFactory().createSqlType(SqlTypeName.get("FLOAT"))));
return builder.build();
}

@Override
public <T> T accept(@NonNull DingoRelVisitor<T> visitor) {
return visitor.visit(this);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import io.dingodb.calcite.rel.DingoGetByIndex;
import io.dingodb.calcite.rel.DingoGetByIndexMerge;
import io.dingodb.calcite.rel.DingoGetByKeys;
import io.dingodb.calcite.rel.DingoGetDocumentPreFilter;
import io.dingodb.calcite.rel.DingoTableScan;
import io.dingodb.calcite.rel.DingoDocument;
import io.dingodb.calcite.rel.LogicalDingoDocument;
Expand All @@ -35,6 +36,7 @@
import io.dingodb.common.util.Pair;
import io.dingodb.meta.entity.Column;
import io.dingodb.meta.entity.IndexTable;
import io.dingodb.meta.entity.IndexType;
import io.dingodb.meta.entity.Table;
import lombok.extern.slf4j.Slf4j;
import org.apache.calcite.plan.RelOptRuleCall;
Expand All @@ -52,6 +54,7 @@
import java.util.stream.Collectors;

import static io.dingodb.calcite.rel.LogicalDingoTableScan.dispatchDistanceCondition;
import static io.dingodb.calcite.rule.DingoGetByIndexRule.eliminateSpecialCast;
import static io.dingodb.calcite.rule.DingoGetByIndexRule.filterIndices;
import static io.dingodb.calcite.rule.DingoGetByIndexRule.filterScalarIndices;
import static io.dingodb.calcite.rule.DingoGetByIndexRule.getScalaIndices;
Expand All @@ -72,13 +75,81 @@ protected DingoDocumentIndexRule(Config config) {
@Override
public void onMatch(RelOptRuleCall call) {
DingoDocument document = call.rel(0);
// RelNode relNode = getDingoGetDocumentByToken(document.getFilter(), document, false);
RelNode relNode = null;
RelNode relNode = getDingoGetDocumentPreFilter(document.getFilter(), document, false);
if (relNode == null) {
return;
}
call.transformTo(relNode);
}

public static RelNode getDingoGetDocumentPreFilter(RexNode condition, LogicalDingoDocument document, boolean forJoin) {
DingoTable dingoTable = document.getTable().unwrap(DingoTable.class);
assert dingoTable != null;
TupleMapping selection = getDefaultSelection(dingoTable);

if (condition != null) {
dispatchDistanceCondition(condition, selection, dingoTable);
}

// if filter matched point get by primary key, then DingoGetByKeys priority highest
Pair<Integer, Integer> textIdPair = getTextIdIndex(dingoTable);
assert textIdPair != null;
RelTraitSet traitSet = document.getTraitSet().replace(DingoRelStreaming.of(document.getTable()));
boolean preFilter = document.getHints() != null
&& !document.getHints().isEmpty()
&& "text_search_pre".equalsIgnoreCase(document.getHints().get(0).hintName);

// document filter match primary point get
RelNode relNode = prePrimaryOrScalarPlan(condition, document,textIdPair, traitSet, selection, preFilter);
if (relNode != null) {
return relNode;
}

if (!preFilter && !forJoin) {
return null;
}

// pre filtering
//Step1:Table scan to find the target original columns and store them into cache
//Step2: Text search with document id, returns document id and score
//Step3: Merge cache data and document score with document id

DingoTableScan dingoTableScan = new DingoTableScan(document.getCluster(),
traitSet,
ImmutableList.of(),
document.getTable(),
condition,
selection,
null,
null,
null,
true,
false
);

DocumentStreamConvertor documentStreamConvertor = new DocumentStreamConvertor(
document.getCluster(),
document.getTraitSet(),
dingoTableScan,
document.getIndexTableId(),
textIdPair.getKey(),
document.getIndexTable(),
false);
return new DingoGetDocumentPreFilter(
document.getCluster(),
traitSet,
documentStreamConvertor,
condition,
document.getTable(),
document.getOperands(),
textIdPair.getKey(),
textIdPair.getValue(),
document.getIndexTableId(),
document.getSelection(),
document.getIndexTable()
);
}

private static DingoGetByIndex preScalarRelNode(LogicalDingoDocument dingoDocument,
IndexValueMapSet<Integer, RexNode> indexValueMapSet,
Table td,
Expand Down Expand Up @@ -139,19 +210,80 @@ default DingoDocumentIndexRule toRule() {
return new DingoDocumentIndexRule(this);
}
}
private static RelNode prePrimaryOrScalarPlan(
RexNode condition,
LogicalDingoDocument document,
Pair<Integer, Integer> documentIdPair,
RelTraitSet traitSet,
TupleMapping selection,
boolean preFilter) {
if (condition == null) {
return null;
}
DingoTable dingoTable = document.getTable().unwrap(DingoTable.class);
RexNode rexNode = RexUtil.toDnf(document.getCluster().getRexBuilder(), condition);
rexNode = eliminateSpecialCast(rexNode, document.getCluster().getRexBuilder());
IndexValueMapSetVisitor visitor = new IndexValueMapSetVisitor(document.getCluster().getRexBuilder());
IndexValueMapSet<Integer, RexNode> indexValueMapSet = rexNode.accept(visitor);
assert dingoTable != null;
final Table td = dingoTable.getTable();
List<Integer> keyIndices = Arrays.stream(td.keyMapping().getMappings()).boxed().collect(Collectors.toList());

Set<Map<Integer, RexNode>> keyMapSet = filterIndices(indexValueMapSet, keyIndices, selection);

RelNode scan = null;
if (keyMapSet != null) {
scan = new DingoGetByKeys(
document.getCluster(),
document.getTraitSet(),
ImmutableList.of(),
document.getTable(),
condition,
selection,
keyMapSet
);
} else if (preFilter) {
scan = preScalarRelNode(document, indexValueMapSet, td, selection, condition);
}

private static Pair<Integer, Integer> getDocumentIndex(DingoTable dingoTable, int dimension) {
if (scan == null) {
return null;
}
DocumentStreamConvertor documentStreamConvertor = new DocumentStreamConvertor(
document.getCluster(),
document.getTraitSet(),
scan,
document.getIndexTableId(),
documentIdPair.getKey(),
document.getIndexTable(),
false);
DingoGetDocumentPreFilter dingoGetDocumentPreFilter = new DingoGetDocumentPreFilter(
document.getCluster(),
traitSet,
documentStreamConvertor,
condition,
document.getTable(),
document.getOperands(),
documentIdPair.getKey(),
documentIdPair.getValue(),
document.getIndexTableId(),
document.getSelection(),
document.getIndexTable()
);
RelTraitSet traits = document.getCluster().traitSet()
.replace(DingoConvention.INSTANCE)
.replace(DingoRelStreaming.ROOT);
return new DingoStreamingConverter(document.getCluster(),
traits, dingoGetDocumentPreFilter);
}
private static Pair<Integer, Integer> getTextIdIndex(DingoTable dingoTable) {
List<IndexTable> indexes = dingoTable.getTable().getIndexes();
for (IndexTable index : indexes) {

if (!index.getIndexType().isVector) {
if (index.getIndexType() != IndexType.DOCUMENT) {
continue;
}

int dimension1 = Integer.parseInt(index.getProperties().getProperty("dimension"));
if (dimension != dimension1) {
continue;
}
String documentIdColName = index.getColumns().get(0).getName();
String documentColName = index.getColumns().get(1).getName();
int documentIdIndex = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,18 @@ public final class DingoRules {
public static final DingoTableCollationRule SORT_REMOVE_DINGO_SCAN
= DingoTableCollationRule.Config.SORT_REMOVE_DINGO_SCAN.toRule();

public static final DingoDocumentIndexRule DINGO_DOCUMENT_INDEX_RULE
= DingoDocumentIndexRule.Config.DEFAULT.toRule();

public static final DingoDocumentJoinRule DINGO_DOCUMENT_JOIN_RULE
= DingoDocumentJoinRule.Config.DEFAULT.toRule();

public static final DingoDocumentFilterRule DINGO_DOCUMENT_FILTER_RULE
= DingoDocumentFilterRule.Config.DEFAULT.toRule();

public static final DingoDocumentProjectRule DINGO_DOCUMENT_PROJECT_RULE
= DingoDocumentProjectRule.Config.DEFAULT.toRule();

private static final List<RelOptRule> rules = ImmutableList.of(
CoreRules.AGGREGATE_EXPAND_DISTINCT_AGGREGATES,
CoreRules.AGGREGATE_EXPAND_DISTINCT_AGGREGATES_TO_JOIN,
Expand Down Expand Up @@ -228,7 +240,12 @@ public final class DingoRules {
DINGO_INDEX_COLLATION_RULE,
INDEXSCAN_SORT_ASC,
INDEX_NONLEFT_ORDER,
SORT_REMOVE_DINGO_SCAN
SORT_REMOVE_DINGO_SCAN,
DINGO_DOCUMENT_INDEX_RULE,
DINGO_DOCUMENT_JOIN_RULE,
DINGO_DOCUMENT_PROJECT_RULE,
DINGO_DOCUMENT_FILTER_RULE

);

private DingoRules() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import io.dingodb.calcite.rel.DingoGetByIndex;
import io.dingodb.calcite.rel.DingoGetByIndexMerge;
import io.dingodb.calcite.rel.DingoGetByKeys;
import io.dingodb.calcite.rel.DingoGetDocumentPreFilter;
import io.dingodb.calcite.rel.DingoGetVectorByDistance;
import io.dingodb.calcite.rel.DingoHybridSearch;
import io.dingodb.calcite.rel.DingoInfoSchemaScan;
Expand Down Expand Up @@ -504,4 +505,13 @@ public Explain visitDingoIndexScanWithRelOp(@NonNull DingoIndexScanWithRelOp rel
explain1.getChildren().add(explain);
return explain1;
}

@Override
public Explain visit(@NonNull DingoGetDocumentPreFilter rel) {
String accessObj = "";
if (rel.getIndexTable() != null) {
accessObj = rel.getIndexTable().getName();
}
return getCommonExplain(rel, "DingoGetDocumentPreFilter", accessObj, "");
}
}
Loading

0 comments on commit e72f128

Please sign in to comment.