Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle null values appropriately during segment reload for new derived columns #13212

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1485,6 +1485,7 @@ public void testStarTreeTriggering()
* <li>"NewAddedRawDerivedStringDimension", DIMENSION, STRING, single-value, reverse(DestCityName)</li>
* <li>"NewAddedRawDerivedMVIntDimension", DIMENSION, INT, multi-value, array(ActualElapsedTime)</li>
* <li>"NewAddedDerivedMVDoubleDimension", DIMENSION, DOUBLE, multi-value, array(ArrDelayMinutes)</li>
* <li>"NewAddedDerivedNullString", DIMENSION, STRING, single-value, caseWhen(true, null, null)</li>
* </ul>
*/
@Test(dependsOnMethods = "testAggregateMetadataAPI", dataProvider = "useBothQueryEngines")
Expand All @@ -1496,7 +1497,7 @@ public void testDefaultColumns(boolean useMultiStageQueryEngine)
reloadWithExtraColumns();
JsonNode queryResponse = postQuery(SELECT_STAR_QUERY);
assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
assertEquals(queryResponse.get("resultTable").get("dataSchema").get("columnNames").size(), 103);
assertEquals(queryResponse.get("resultTable").get("dataSchema").get("columnNames").size(), 104);

testNewAddedColumns();

Expand Down Expand Up @@ -1581,6 +1582,8 @@ private void reloadWithExtraColumns()
schema.addField(new DimensionFieldSpec("NewAddedRawDerivedStringDimension", DataType.STRING, true));
schema.addField(new DimensionFieldSpec("NewAddedRawDerivedMVIntDimension", DataType.INT, false));
schema.addField(new DimensionFieldSpec("NewAddedDerivedMVDoubleDimension", DataType.DOUBLE, false));
schema.addField(new DimensionFieldSpec("NewAddedDerivedNullString", DataType.STRING, true, "nil"));
schema.setEnableColumnBasedNullHandling(true);
addSchema(schema);

TableConfig tableConfig = getOfflineTableConfig();
Expand All @@ -1593,7 +1596,8 @@ private void reloadWithExtraColumns()
new TransformConfig("NewAddedDerivedDivAirportSeqIDsString", "DivAirportSeqIDs"),
new TransformConfig("NewAddedRawDerivedStringDimension", "reverse(DestCityName)"),
new TransformConfig("NewAddedRawDerivedMVIntDimension", "array(ActualElapsedTime)"),
new TransformConfig("NewAddedDerivedMVDoubleDimension", "array(ArrDelayMinutes)"));
new TransformConfig("NewAddedDerivedMVDoubleDimension", "array(ArrDelayMinutes)"),
new TransformConfig("NewAddedDerivedNullString", "caseWhen(true, null, null)"));
IngestionConfig ingestionConfig = new IngestionConfig();
ingestionConfig.setTransformConfigs(transformConfigs);
tableConfig.setIngestionConfig(ingestionConfig);
Expand All @@ -1618,30 +1622,38 @@ private void reloadWithExtraColumns()
JsonNode columnIndexSizeMap = JsonUtils.stringToJsonNode(sendGetRequest(
getControllerBaseApiUrl() + "/tables/mytable/metadata?columns=DivAirportSeqIDs"
+ "&columns=NewAddedDerivedDivAirportSeqIDs&columns=NewAddedDerivedDivAirportSeqIDsString"
+ "&columns=NewAddedRawDerivedStringDimension&columns=NewAddedRawDerivedMVIntDimension"))
+ "&columns=NewAddedRawDerivedStringDimension&columns=NewAddedRawDerivedMVIntDimension"
+ "&columns=NewAddedDerivedNullString"))
.get("columnIndexSizeMap");
assertEquals(columnIndexSizeMap.size(), 5);
assertEquals(columnIndexSizeMap.size(), 6);
JsonNode originalColumnIndexSizes = columnIndexSizeMap.get("DivAirportSeqIDs");
JsonNode derivedColumnIndexSizes = columnIndexSizeMap.get("NewAddedDerivedDivAirportSeqIDs");
JsonNode derivedStringColumnIndexSizes = columnIndexSizeMap.get("NewAddedDerivedDivAirportSeqIDsString");
JsonNode derivedRawStringColumnIndex = columnIndexSizeMap.get("NewAddedRawDerivedStringDimension");
JsonNode derivedRawMVIntColumnIndex = columnIndexSizeMap.get("NewAddedRawDerivedMVIntDimension");
JsonNode derivedNullStringColumnIndex = columnIndexSizeMap.get("NewAddedDerivedNullString");

// Derived int column should have the same dictionary size as the original column
double originalColumnDictionarySize = originalColumnIndexSizes.get("dictionary").asDouble();
assertEquals(derivedColumnIndexSizes.get("dictionary").asDouble(), originalColumnDictionarySize);
double originalColumnDictionarySize = originalColumnIndexSizes.get(StandardIndexes.DICTIONARY_ID).asDouble();
assertEquals(derivedColumnIndexSizes.get(StandardIndexes.DICTIONARY_ID).asDouble(), originalColumnDictionarySize);

// Derived string column should have larger dictionary size than the original column
assertTrue(derivedStringColumnIndexSizes.get("dictionary").asDouble() > originalColumnDictionarySize);
assertTrue(
derivedStringColumnIndexSizes.get(StandardIndexes.DICTIONARY_ID).asDouble() > originalColumnDictionarySize);

// Both derived columns should have smaller forward index size than the original column because of compression
double derivedColumnForwardIndexSize = derivedColumnIndexSizes.get("forward_index").asDouble();
assertTrue(derivedColumnForwardIndexSize < originalColumnIndexSizes.get("forward_index").asDouble());
assertEquals(derivedStringColumnIndexSizes.get("forward_index").asDouble(), derivedColumnForwardIndexSize);
double derivedColumnForwardIndexSize = derivedColumnIndexSizes.get(StandardIndexes.FORWARD_ID).asDouble();
assertTrue(derivedColumnForwardIndexSize < originalColumnIndexSizes.get(StandardIndexes.FORWARD_ID).asDouble());
assertEquals(derivedStringColumnIndexSizes.get(StandardIndexes.FORWARD_ID).asDouble(),
derivedColumnForwardIndexSize);

assertTrue(derivedRawStringColumnIndex.has(StandardIndexes.FORWARD_ID));
assertFalse(derivedRawStringColumnIndex.has(StandardIndexes.DICTIONARY_ID));

assertTrue(derivedRawStringColumnIndex.has("forward_index"));
assertFalse(derivedRawStringColumnIndex.has("dictionary"));
assertTrue(derivedRawMVIntColumnIndex.has(StandardIndexes.FORWARD_ID));
assertFalse(derivedRawMVIntColumnIndex.has(StandardIndexes.DICTIONARY_ID));

assertTrue(derivedRawMVIntColumnIndex.has("forward_index"));
assertFalse(derivedRawMVIntColumnIndex.has("dictionary"));
assertTrue(derivedNullStringColumnIndex.has(StandardIndexes.NULL_VALUE_VECTOR_ID));
}

private void reloadWithMissingColumns()
Expand Down Expand Up @@ -1753,6 +1765,9 @@ private void testNewAddedColumns()
+ (useMultiStageQueryEngine() ? "X''" : "''");
testQuery(pinotQuery, h2Query);

pinotQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedDerivedNullString = 'nil'";
testQuery(pinotQuery, h2Query);

pinotQuery = "SELECT COUNT(*) FROM mytable WHERE NewAddedDerivedHoursSinceEpoch = 392232";
h2Query = "SELECT COUNT(*) FROM mytable WHERE DaysSinceEpoch = 16343";
testQuery(pinotQuery, h2Query);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ public class GroovyFunctionEvaluator implements FunctionEvaluator {
private final int _numArguments;
private final Binding _binding;
private final Script _script;
private final String _expression;

public GroovyFunctionEvaluator(String closure) {
_expression = closure;
Matcher matcher = GROOVY_FUNCTION_PATTERN.matcher(closure);
Preconditions.checkState(matcher.matches(), "Invalid transform expression: %s", closure);
String arguments = matcher.group(ARGUMENTS_GROUP_NAME);
Expand Down Expand Up @@ -110,4 +112,9 @@ public Object evaluate(Object[] values) {
}
return _script.run();
}

@Override
public String toString() {
return _expression;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ public class InbuiltFunctionEvaluator implements FunctionEvaluator {
// Root of the execution tree
private final ExecutableNode _rootNode;
private final List<String> _arguments;
private final String _functionExpression;

public InbuiltFunctionEvaluator(String functionExpression) {
_functionExpression = functionExpression;
_arguments = new ArrayList<>();
_rootNode = planExecution(RequestContextUtils.getExpression(functionExpression));
}
Expand Down Expand Up @@ -118,6 +120,11 @@ public Object evaluate(Object[] values) {
return _rootNode.execute(values);
}

@Override
public String toString() {
return _functionExpression;
}

private interface ExecutableNode {

Object execute(GenericRow row);
Expand Down
Loading
Loading