From c7a116d1503e70900ded91c6d01c7f872087d4e3 Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Thu, 11 Jul 2024 18:42:01 +0530 Subject: [PATCH] Fill derived column with the default null value on transform function errors --- .../function/GroovyFunctionEvaluator.java | 7 +++ .../function/InbuiltFunctionEvaluator.java | 7 +++ .../BaseDefaultColumnHandler.java | 35 ++++++++++++-- .../index/loader/SegmentPreProcessorTest.java | 47 ++++++++++++++----- .../resources/data/newColumnsSchema1.json | 8 +++- 5 files changed, 88 insertions(+), 16 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/function/GroovyFunctionEvaluator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/function/GroovyFunctionEvaluator.java index 8904f3ffded9..52f36465bb16 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/function/GroovyFunctionEvaluator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/function/GroovyFunctionEvaluator.java @@ -58,8 +58,10 @@ public class GroovyFunctionEvaluator implements FunctionEvaluator { private final int _numArguments; private final Binding _binding; private final Script _script; + private final String _expression; public GroovyFunctionEvaluator(String closure) { + _expression = closure; Matcher matcher = GROOVY_FUNCTION_PATTERN.matcher(closure); Preconditions.checkState(matcher.matches(), "Invalid transform expression: %s", closure); String arguments = matcher.group(ARGUMENTS_GROUP_NAME); @@ -110,4 +112,9 @@ public Object evaluate(Object[] values) { } return _script.run(); } + + @Override + public String toString() { + return _expression; + } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/function/InbuiltFunctionEvaluator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/function/InbuiltFunctionEvaluator.java index 7b28ce7850f8..d0d85e6d8c1d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/function/InbuiltFunctionEvaluator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/function/InbuiltFunctionEvaluator.java @@ -46,8 +46,10 @@ public class InbuiltFunctionEvaluator implements FunctionEvaluator { // Root of the execution tree private final ExecutableNode _rootNode; private final List _arguments; + private final String _functionExpression; public InbuiltFunctionEvaluator(String functionExpression) { + _functionExpression = functionExpression; _arguments = new ArrayList<>(); _rootNode = planExecution(RequestContextUtils.getExpression(functionExpression)); } @@ -118,6 +120,11 @@ public Object evaluate(Object[] values) { return _rootNode.execute(values); } + @Override + public String toString() { + return _functionExpression; + } + private interface ExecutableNode { Object execute(GenericRow row); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java index 43a7c6c385ee..c79195734d2a 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -393,7 +394,7 @@ protected boolean createColumnV1Indices(String column) } try { - createDerivedColumnV1Indices(column, functionEvaluator, argumentsMetadata); + createDerivedColumnV1Indices(column, functionEvaluator, argumentsMetadata, errorOnFailure); return true; } catch (Exception e) { LOGGER.error("Caught exception while creating derived column: {} with transform function: {}", column, @@ -556,7 +557,7 @@ private boolean isNullable(FieldSpec fieldSpec) { * - Support forward index disabled derived column */ private void createDerivedColumnV1Indices(String column, FunctionEvaluator functionEvaluator, - List argumentsMetadata) + List argumentsMetadata, boolean errorOnFailure) throws Exception { // Initialize value readers for all arguments int numArguments = argumentsMetadata.size(); @@ -570,6 +571,12 @@ private void createDerivedColumnV1Indices(String column, FunctionEvaluator funct if (isNullable(fieldSpec)) { nullValueVectorCreator = new NullValueVectorCreator(_indexDir, fieldSpec.getName()); } + + // Just log the first function evaluation error + int functionEvaluateErrorCount = 0; + Exception functionEvalError = null; + Object[] inputValuesWithError = null; + try { // Calculate the values for the derived column Object[] inputValues = new Object[numArguments]; @@ -580,7 +587,23 @@ private void createDerivedColumnV1Indices(String column, FunctionEvaluator funct for (int j = 0; j < numArguments; j++) { inputValues[j] = valueReaders.get(j).getValue(i); } - Object outputValue = functionEvaluator.evaluate(inputValues); + + Object outputValue = null; + try { + outputValue = functionEvaluator.evaluate(inputValues); + } catch (Exception e) { + if (!errorOnFailure) { + LOGGER.debug("Encountered an exception while evaluating function {} for derived column {} with " + + "arguments: {}", functionEvaluator, column, Arrays.toString(inputValues), e); + functionEvaluateErrorCount++; + if (functionEvalError == null) { + functionEvalError = e; + inputValuesWithError = Arrays.copyOf(inputValues, inputValues.length); + } + } else { + throw e; + } + } if (outputValue == null) { outputValue = fieldSpec.getDefaultNullValue(); @@ -597,6 +620,12 @@ private void createDerivedColumnV1Indices(String column, FunctionEvaluator funct outputValues[i] = outputValue; } + if (functionEvaluateErrorCount > 0) { + LOGGER.warn("Caught {} exceptions while evaluating derived column: {} with function: {}. The first input value " + + "tuple that led to an error is: {}", functionEvaluateErrorCount, column, functionEvaluator, + Arrays.toString(inputValuesWithError), functionEvalError); + } + if (nullValueVectorCreator != null) { nullValueVectorCreator.seal(); } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java index c53952ebd550..6e46127ff334 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java @@ -143,7 +143,8 @@ public class SegmentPreProcessorTest { private static final String NEW_INT_SV_DIMENSION_COLUMN_NAME = "newIntSVDimension"; private static final String NEW_STRING_MV_DIMENSION_COLUMN_NAME = "newStringMVDimension"; private static final String NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME = "newRawStringSVDimension"; - private static final String NEW_NULLABLE_STRING_SV_DIMENSION_COLUMN_NAME = "newNullableStringSVDimension"; + private static final String NEW_NULL_RETURN_STRING_SV_DIMENSION_COLUMN_NAME = "newNullReturnStringSVDimension"; + private static final String NEW_WRONG_ARG_DATE_TRUNC_DERIVED_COLUMN_NAME = "newWrongArgDateTruncDerivedColumn"; private static final String NEW_HLL_BYTE_METRIC_COLUMN_NAME = "newHLLByteMetric"; private static final String NEW_TDIGEST_BYTE_METRIC_COLUMN_NAME = "newTDigestByteMetric"; @@ -1107,13 +1108,17 @@ public void testV1UpdateDefaultColumns() ImmutableList.of( new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)"), new TransformConfig(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, "reverse(column3)"), - // Ensure that null values for derived columns are handled appropriately during segment reload - new TransformConfig(NEW_NULLABLE_STRING_SV_DIMENSION_COLUMN_NAME, - "json_path_string(column21, 'non-existent-path', null)") + // Ensure that null values returned by transform functions for derived columns are handled appropriately + // during segment reload + new TransformConfig(NEW_NULL_RETURN_STRING_SV_DIMENSION_COLUMN_NAME, + "json_path_string(column21, 'non-existent-path', null)"), + // Ensure that any transform function failures result in a null value if error on failure is false + new TransformConfig(NEW_WRONG_ARG_DATE_TRUNC_DERIVED_COLUMN_NAME, "dateTrunc('abcd', column1)") )); _tableConfig.setIngestionConfig(ingestionConfig); _indexLoadingConfig.addInvertedIndexColumns(NEW_COLUMN_INVERTED_INDEX); _indexLoadingConfig.addNoDictionaryColumns(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME); + _indexLoadingConfig.setErrorOnColumnBuildFailure(false); checkUpdateDefaultColumns(); // Try to use the third schema and update default value again. @@ -1161,13 +1166,17 @@ public void testV3UpdateDefaultColumns() ImmutableList.of( new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)"), new TransformConfig(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, "reverse(column3)"), - // Ensure that null values for derived columns are handled appropriately during segment reload - new TransformConfig(NEW_NULLABLE_STRING_SV_DIMENSION_COLUMN_NAME, - "json_path_string(column21, 'non-existent-path', null)") + // Ensure that null values returned by transform functions for derived columns are handled appropriately + // during segment reload + new TransformConfig(NEW_NULL_RETURN_STRING_SV_DIMENSION_COLUMN_NAME, + "json_path_string(column21, 'non-existent-path', null)"), + // Ensure that any transform function failures result in a null value if error on failure is false + new TransformConfig(NEW_WRONG_ARG_DATE_TRUNC_DERIVED_COLUMN_NAME, "dateTrunc('abcd', column1)") )); _tableConfig.setIngestionConfig(ingestionConfig); _indexLoadingConfig.addInvertedIndexColumns(NEW_COLUMN_INVERTED_INDEX); _indexLoadingConfig.addNoDictionaryColumns(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME); + _indexLoadingConfig.setErrorOnColumnBuildFailure(false); checkUpdateDefaultColumns(); // Try to use the third schema and update default value again. @@ -1283,9 +1292,19 @@ private void checkUpdateDefaultColumns() assertEquals(columnMetadata.getBitsPerElement(), originalColumnMetadata.getBitsPerElement()); assertEquals(columnMetadata.getTotalNumberOfEntries(), originalColumnMetadata.getTotalNumberOfEntries()); - columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_NULLABLE_STRING_SV_DIMENSION_COLUMN_NAME); + columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_NULL_RETURN_STRING_SV_DIMENSION_COLUMN_NAME); // All the values should be the default null value assertEquals(columnMetadata.getCardinality(), 1); + assertTrue(columnMetadata.isAutoGenerated()); + assertEquals(columnMetadata.getMinValue(), "nil"); + assertEquals(columnMetadata.getMaxValue(), "nil"); + + columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_WRONG_ARG_DATE_TRUNC_DERIVED_COLUMN_NAME); + // All the values should be the default null value + assertEquals(columnMetadata.getCardinality(), 1); + assertTrue(columnMetadata.isAutoGenerated()); + assertEquals(columnMetadata.getMinValue(), Long.MIN_VALUE); + assertEquals(columnMetadata.getMaxValue(), Long.MIN_VALUE); // Check dictionary and forward index exist. try (SegmentDirectory segmentDirectory = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader() @@ -1309,10 +1328,16 @@ private void checkUpdateDefaultColumns() // Dictionary shouldn't be created for raw derived column assertFalse(reader.hasIndexFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.dictionary())); assertTrue(reader.hasIndexFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.forward())); + // Null vector index should be created for derived column with null values - assertTrue(reader.hasIndexFor(NEW_NULLABLE_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.nullValueVector())); - assertTrue(reader.hasIndexFor(NEW_NULLABLE_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.forward())); - assertTrue(reader.hasIndexFor(NEW_NULLABLE_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.dictionary())); + assertTrue(reader.hasIndexFor(NEW_NULL_RETURN_STRING_SV_DIMENSION_COLUMN_NAME, + StandardIndexes.nullValueVector())); + assertTrue(reader.hasIndexFor(NEW_NULL_RETURN_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.forward())); + assertTrue(reader.hasIndexFor(NEW_NULL_RETURN_STRING_SV_DIMENSION_COLUMN_NAME, StandardIndexes.dictionary())); + assertTrue(reader.hasIndexFor(NEW_WRONG_ARG_DATE_TRUNC_DERIVED_COLUMN_NAME, + StandardIndexes.nullValueVector())); + assertTrue(reader.hasIndexFor(NEW_WRONG_ARG_DATE_TRUNC_DERIVED_COLUMN_NAME, StandardIndexes.forward())); + assertTrue(reader.hasIndexFor(NEW_WRONG_ARG_DATE_TRUNC_DERIVED_COLUMN_NAME, StandardIndexes.dictionary())); assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, StandardIndexes.nullValueVector())); assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, StandardIndexes.nullValueVector())); diff --git a/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json b/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json index 82552ab4a3ca..b982b83c8150 100644 --- a/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json +++ b/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json @@ -39,8 +39,12 @@ }, { "dataType": "STRING", - "name": "newNullableStringSVDimension", - "defaultNullValue": "null" + "name": "newNullReturnStringSVDimension", + "defaultNullValue": "nil" + }, + { + "dataType": "LONG", + "name": "newWrongArgDateTruncDerivedColumn" } ] }