Skip to content

Commit

Permalink
Allow usage of star-tree index with null handling enabled when no nul…
Browse files Browse the repository at this point in the history
…l values in segment columns (#14177)
  • Loading branch information
yashmayya authored Oct 9, 2024
1 parent 9de57b0 commit d016df3
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,17 @@
import org.apache.pinot.segment.spi.index.startree.AggregationSpec;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


@SuppressWarnings("rawtypes")
public class StarTreeUtils {
private StarTreeUtils() {
}

private static final Logger LOGGER = LoggerFactory.getLogger(StarTreeUtils.class);

/**
* Extracts the {@link AggregationFunctionColumnPair}s from the given {@link AggregationFunction}s. Returns
* {@code null} if any {@link AggregationFunction} cannot be represented as an {@link AggregationFunctionColumnPair}
Expand Down Expand Up @@ -354,7 +358,7 @@ public static BaseProjectOperator<?> createStarTreeBasedProjectOperator(IndexSeg
QueryContext queryContext, AggregationFunction[] aggregationFunctions, @Nullable FilterContext filter,
List<Pair<Predicate, PredicateEvaluator>> predicateEvaluators) {
List<StarTreeV2> starTrees = indexSegment.getStarTrees();
if (starTrees == null || queryContext.isSkipStarTree() || queryContext.isNullHandlingEnabled()) {
if (starTrees == null || queryContext.isSkipStarTree()) {
return null;
}

Expand All @@ -363,15 +367,57 @@ public static BaseProjectOperator<?> createStarTreeBasedProjectOperator(IndexSeg
if (aggregationFunctionColumnPairs == null) {
return null;
}

Map<String, List<CompositePredicateEvaluator>> predicateEvaluatorsMap =
extractPredicateEvaluatorsMap(indexSegment, filter, predicateEvaluators);
if (predicateEvaluatorsMap == null) {
return null;
}

ExpressionContext[] groupByExpressions =
queryContext.getGroupByExpressions() != null ? queryContext.getGroupByExpressions()
.toArray(new ExpressionContext[0]) : null;

if (queryContext.isNullHandlingEnabled()) {
// We can still use the star-tree index if there aren't actually any null values in this segment for all the
// metrics being aggregated, all the dimensions being filtered on / grouped by.
for (AggregationFunctionColumnPair aggregationFunctionColumnPair : aggregationFunctionColumnPairs) {
if (aggregationFunctionColumnPair == AggregationFunctionColumnPair.COUNT_STAR) {
// Null handling is irrelevant for COUNT(*)
continue;
}

String column = aggregationFunctionColumnPair.getColumn();
DataSource dataSource = indexSegment.getDataSource(column);
if (dataSource.getNullValueVector() != null && !dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
LOGGER.debug("Cannot use star-tree index because aggregation column: '{}' has null values", column);
return null;
}
}

for (String column : predicateEvaluatorsMap.keySet()) {
DataSource dataSource = indexSegment.getDataSource(column);
if (dataSource.getNullValueVector() != null && !dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
LOGGER.debug("Cannot use star-tree index because filter column: '{}' has null values", column);
return null;
}
}

Set<String> groupByColumns = new HashSet<>();
if (groupByExpressions != null) {
for (ExpressionContext groupByExpression : groupByExpressions) {
groupByExpression.getColumns(groupByColumns);
}
}
for (String column : groupByColumns) {
DataSource dataSource = indexSegment.getDataSource(column);
if (dataSource.getNullValueVector() != null && !dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
LOGGER.debug("Cannot use star-tree index because group-by column: '{}' has null values", column);
return null;
}
}
}

List<Pair<AggregationFunction, AggregationFunctionColumnPair>> aggregations =
new ArrayList<>(aggregationFunctions.length);
for (int i = 0; i < aggregationFunctions.length; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ private static StarTreeIndexConfig getStarTreeIndexConfig(List<String> dimension
}
for (String metric : metrics) {
aggregationConfigs.add(
new StarTreeAggregationConfig(metric, functionType.name(), null, CompressionCodec.LZ4,
false, 4, null, null));
new StarTreeAggregationConfig(metric, functionType.name(), null, CompressionCodec.LZ4, false, 4, null,
null));
}
}
return new StarTreeIndexConfig(dimensions, null, null, aggregationConfigs, maxLeafRecords);
Expand Down Expand Up @@ -213,22 +213,35 @@ private void testStarQuery(String starQuery, boolean verifyPlan)
throws Exception {
String explain = "EXPLAIN PLAN FOR ";
String disableStarTree = "SET useStarTree = false; ";
// The star-tree index doesn't currently support null values, but we should still be able to use the star-tree index
// here since there aren't actually any null values in the dataset.
String nullHandlingEnabled = "SET enableNullHandling = true; ";

if (verifyPlan) {
JsonNode starPlan = postQuery(explain + starQuery);
JsonNode referencePlan = postQuery(disableStarTree + explain + starQuery);
JsonNode nullHandlingEnabledPlan = postQuery(nullHandlingEnabled + explain + starQuery);
assertTrue(starPlan.toString().contains(FILTER_STARTREE_INDEX) || starPlan.toString().contains("FILTER_EMPTY")
|| starPlan.toString().contains("ALL_SEGMENTS_PRUNED_ON_SERVER"),
"StarTree query did not indicate use of StarTree index in query plan. Plan: " + starPlan);
assertFalse(referencePlan.toString().contains(FILTER_STARTREE_INDEX),
"Reference query indicated use of StarTree index in query plan. Plan: " + referencePlan);
assertTrue(
nullHandlingEnabledPlan.toString().contains(FILTER_STARTREE_INDEX) || nullHandlingEnabledPlan.toString()
.contains("FILTER_EMPTY") || nullHandlingEnabledPlan.toString().contains("ALL_SEGMENTS_PRUNED_ON_SERVER"),
"StarTree query with null handling enabled did not indicate use of StarTree index in query plan. Plan: "
+ nullHandlingEnabledPlan);
}

JsonNode starResponse = postQuery(starQuery);
String referenceQuery = disableStarTree + starQuery;
JsonNode referenceResponse = postQuery(referenceQuery);
// Don't compare the actual response values since they could differ (e.g. "null" vs "Infinity" for MIN
// aggregation function with no values aggregated)
JsonNode nullHandlingEnabledResponse = postQuery(nullHandlingEnabled + starQuery);
assertEquals(starResponse.get("exceptions").size(), 0);
assertEquals(referenceResponse.get("exceptions").size(), 0);
assertEquals(nullHandlingEnabledResponse.get("exceptions").size(), 0);
assertEquals(starResponse.get("resultTable"), referenceResponse.get("resultTable"), String.format(
"Query comparison failed for: \n"
+ "Star Query: %s\nStar Response: %s\nReference Query: %s\nReference Response: %s\nRandom Seed: %d",
Expand Down

0 comments on commit d016df3

Please sign in to comment.