diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java index 749c4cf704a0..b395e5b21cd8 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java @@ -293,8 +293,9 @@ private boolean createDictionaryForColumn(ColumnIndexCreationInfo info, SegmentG FieldIndexConfigs fieldIndexConfigs = config.getIndexConfigsByColName().get(column); if (DictionaryIndexType.ignoreDictionaryOverride(config.isOptimizeDictionary(), - config.isOptimizeDictionaryForMetrics(), config.getNoDictionarySizeRatioThreshold(), spec, fieldIndexConfigs, - info.getDistinctValueCount(), info.getTotalNumberOfEntries())) { + config.isOptimizeDictionaryForMetrics(), config.getNoDictionarySizeRatioThreshold(), + config.getNoDictionaryCardinalityRatioThreshold(), spec, fieldIndexConfigs, info.getDistinctValueCount(), + info.getTotalNumberOfEntries())) { // Ignore overrides and pick from config createDictionary = info.isCreateDictionary(); } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java index 9df6d24daaad..9fb6dbce87cc 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java @@ -218,11 +218,12 @@ public static boolean shouldUseVarLengthDictionary(FieldSpec.DataType columnStor * This function evaluates whether to override dictionary (i.e use noDictionary) * for a column even when its explicitly configured. This evaluation is for both dimension and metric * column types. + * + * @return true if dictionary should be created, false if noDictionary should be used */ - public static boolean ignoreDictionaryOverride(boolean optimizeDictionary, - boolean optimizeDictionaryForMetrics, double noDictionarySizeRatioThreshold, - FieldSpec fieldSpec, FieldIndexConfigs fieldIndexConfigs, int cardinality, - int totalNumberOfEntries) { + public static boolean ignoreDictionaryOverride(boolean optimizeDictionary, boolean optimizeDictionaryForMetrics, + double noDictionarySizeRatioThreshold, @Nullable Double noDictionaryCardinalityRatioThreshold, + FieldSpec fieldSpec, FieldIndexConfigs fieldIndexConfigs, int cardinality, int totalNumberOfEntries) { // For an inverted index dictionary is required if (fieldIndexConfigs.getConfig(StandardIndexes.inverted()).isEnabled()) { return true; @@ -236,22 +237,38 @@ public static boolean ignoreDictionaryOverride(boolean optimizeDictionary, // Do not create dictionary if index size with dictionary is going to be larger than index size without dictionary // This is done to reduce the cost of dictionary for high cardinality columns // Off by default and needs optimizeDictionary to be set to true - if (fieldSpec.isSingleValueField() && fieldSpec.getDataType().isFixedWidth()) { - // if you can safely enable dictionary, you can ignore overrides - return canSafelyCreateDictionaryWithinThreshold(cardinality, totalNumberOfEntries, - noDictionarySizeRatioThreshold, fieldSpec); + if (fieldSpec.isSingleValueField()) { + return ignoreDictionaryOverrideForSingleValueFields(cardinality, totalNumberOfEntries, + noDictionarySizeRatioThreshold, noDictionaryCardinalityRatioThreshold, fieldSpec); } } + if (optimizeDictionaryForMetrics && !optimizeDictionary && fieldSpec.isSingleValueField() + && fieldSpec.getFieldType() == FieldSpec.FieldType.METRIC) { + return ignoreDictionaryOverrideForSingleValueFields(cardinality, totalNumberOfEntries, + noDictionarySizeRatioThreshold, noDictionaryCardinalityRatioThreshold, fieldSpec); + } + return true; + } - if (optimizeDictionaryForMetrics && !optimizeDictionary) { - if (fieldSpec.isSingleValueField() && fieldSpec.getDataType().isFixedWidth() && fieldSpec.getFieldType() - == FieldSpec.FieldType.METRIC) { + /** + * Hold common logic for ignoring dictionary override for single value fields, used for dim and metric cols + */ + private static boolean ignoreDictionaryOverrideForSingleValueFields(int cardinality, int totalNumberOfEntries, + double noDictionarySizeRatioThreshold, Double noDictionaryCardinalityRatioThreshold, FieldSpec fieldSpec) { + if (fieldSpec.isSingleValueField()) { + if (fieldSpec.getDataType().isFixedWidth()) { // if you can safely enable dictionary, you can ignore overrides return canSafelyCreateDictionaryWithinThreshold(cardinality, totalNumberOfEntries, noDictionarySizeRatioThreshold, fieldSpec); } + // Config not set, default to old behavior of create dictionary for var width cols + if (noDictionaryCardinalityRatioThreshold == null) { + return true; + } + // Variable width type, so create based simply on cardinality threshold since size cannot be calculated easily + return noDictionaryCardinalityRatioThreshold * totalNumberOfEntries > cardinality; } - return true; + return false; } /** diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java index 3b654a7ebc59..21dd9e18c250 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java @@ -303,9 +303,10 @@ Map> computeOperations(SegmentDirectory.Reader segmentRe if (existingColumnMetadata.getFieldSpec().getFieldType() != FieldSpec.FieldType.COMPLEX && DictionaryIndexType.ignoreDictionaryOverride(_tableConfig.getIndexingConfig().isOptimizeDictionary(), _tableConfig.getIndexingConfig().isOptimizeDictionaryForMetrics(), - _tableConfig.getIndexingConfig().getNoDictionarySizeRatioThreshold(), existingColumnMetadata.getFieldSpec(), - _fieldIndexConfigs.get(column), existingColumnMetadata.getCardinality(), - existingColumnMetadata.getTotalNumberOfEntries())) { + _tableConfig.getIndexingConfig().getNoDictionarySizeRatioThreshold(), + _tableConfig.getIndexingConfig().getNoDictionaryCardinalityRatioThreshold(), + existingColumnMetadata.getFieldSpec(), _fieldIndexConfigs.get(column), + existingColumnMetadata.getCardinality(), existingColumnMetadata.getTotalNumberOfEntries())) { columnOperationsMap.put(column, Collections.singletonList(Operation.ENABLE_DICTIONARY)); } } else if (existingHasDict && !newIsDict) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/DictionaryOptimizerCardinalityTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/DictionaryOptimizerCardinalityTest.java new file mode 100644 index 000000000000..85f57beae7ac --- /dev/null +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/DictionaryOptimizerCardinalityTest.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pinot.segment.local.segment.creator; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentCreationDriverFactory; +import org.apache.pinot.segment.spi.ImmutableSegment; +import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver; +import org.apache.pinot.segment.spi.creator.SegmentVersion; +import org.apache.pinot.spi.config.table.FieldConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.config.table.ingestion.IngestionConfig; +import org.apache.pinot.spi.data.DimensionFieldSpec; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.data.readers.FileFormat; +import org.apache.pinot.spi.utils.ReadMode; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +public class DictionaryOptimizerCardinalityTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(DictionaryOptimizerCardinalityTest.class); + private static final File INDEX_DIR = new File(DictionaryOptimizerCardinalityTest.class.toString()); + private static File _segmentDirectory; + private static File _csvFile; + + // Test cardinality based dictionary optimization for var-length data type columns + @Test + public void testDictionaryForMixedCardinalitiesStringType() + throws Exception { + + ImmutableSegment heapSegment = ImmutableSegmentLoader.load(_segmentDirectory, ReadMode.heap); + + Schema schema = heapSegment.getSegmentMetadata().getSchema(); + for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) { + // Skip virtual columns + if (fieldSpec.isVirtualColumn()) { + continue; + } + + String columnName = fieldSpec.getName(); + if ("low_cardinality_strings".equals(columnName)) { + Assert.assertTrue(heapSegment.getForwardIndex(columnName).isDictionaryEncoded(), + "Low cardinality columns should be dictionary encoded"); + } + + if ("high_cardinality_strings".equals(columnName)) { + Assert.assertFalse(heapSegment.getForwardIndex(columnName).isDictionaryEncoded(), + "High cardinality columns should be raw encoded"); + } + } + } + + @BeforeClass + private void setup() + throws Exception { + + if (INDEX_DIR.exists()) { + FileUtils.deleteQuietly(INDEX_DIR); + } + INDEX_DIR.mkdirs(); + _csvFile = new File(INDEX_DIR, "data.csv"); + generateCsv(_csvFile, 500); + + IngestionConfig ingestionConfig = new IngestionConfig(); + ingestionConfig.setRowTimeValueCheck(false); + ingestionConfig.setSegmentTimeValueCheck(false); + Schema schema = + new Schema.SchemaBuilder().addSingleValueDimension("low_cardinality_strings", FieldSpec.DataType.STRING) + .addSingleValueDimension("high_cardinality_strings", FieldSpec.DataType.STRING) + .addDateTimeField("ts", FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build(); + List stringColumns = + schema.getDimensionFieldSpecs().stream().filter(x -> x.getDataType() == FieldSpec.DataType.STRING).collect( + Collectors.toList()); + + List fieldConfigList = stringColumns.stream().map( + x -> new FieldConfig(x.getName(), FieldConfig.EncodingType.DICTIONARY, Collections.emptyList(), null, null)) + .collect(Collectors.toList()); + + SegmentGeneratorConfig segmentGenSpec = + new SegmentGeneratorConfig(new TableConfigBuilder(TableType.OFFLINE).setTableName("tableName") + .setIngestionConfig(ingestionConfig).setFieldConfigList(fieldConfigList).build(), + schema); + + segmentGenSpec.setInputFilePath(_csvFile.getAbsolutePath()); + segmentGenSpec.setTimeColumnName("ts"); + segmentGenSpec.setSegmentTimeUnit(TimeUnit.SECONDS); + segmentGenSpec.setFormat(FileFormat.CSV); + segmentGenSpec.setSegmentVersion(SegmentVersion.v1); + segmentGenSpec.setTableName("tableName"); + segmentGenSpec.setOutDir(INDEX_DIR.getAbsolutePath()); + segmentGenSpec.setOptimizeDictionary(true); + segmentGenSpec.setNoDictionaryCardinalityRatioThreshold(0.1); // cardinality must be <10% of total docs to override + + SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); + driver.init(segmentGenSpec); + driver.build(); + + _segmentDirectory = new File(INDEX_DIR, driver.getSegmentName()); + } + + // Generate a 3 columns csv file, sample format is: + // low_cardinality_strings,high_cardinality_strings,ts + // Red,kdeejdfnsd,1600000000 + private void generateCsv(File file, int numberOfRows) throws IOException { + String[] lowCardinalityOptions = {"Red", "Blue", "Green", "Yellow", "Purple"}; + String alphabet = "abcdefghijklmnopqrstuvwxyz"; + Random random = new Random(42); + + try (FileWriter writer = new FileWriter(file, false)) { + // Write the header + writer.append("low_cardinality_strings,high_cardinality_strings,ts\n"); + + long startTimestamp = System.currentTimeMillis() / 1000; + for (int i = 0; i < numberOfRows; i++) { + String lowCardinality = lowCardinalityOptions[random.nextInt(lowCardinalityOptions.length)]; + StringBuilder highCardinality = new StringBuilder(10); + for (int j = 0; j < 10; j++) { + highCardinality.append(alphabet.charAt(random.nextInt(alphabet.length()))); + } + long timestamp = startTimestamp + (i / 10); + writer.append(String.format("%s,%s,%d\n", lowCardinality, highCardinality, timestamp)); + } + } + } + + @AfterClass + public void cleanup() { + FileUtils.deleteQuietly(_csvFile); + FileUtils.deleteQuietly(_segmentDirectory); + FileUtils.deleteQuietly(INDEX_DIR); + } +} diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexTypeTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexTypeTest.java index ec82432ab14d..83af69e260d5 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexTypeTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexTypeTest.java @@ -27,15 +27,16 @@ import org.apache.pinot.segment.local.segment.index.AbstractSerdeIndexContract; import org.apache.pinot.segment.spi.index.DictionaryIndexConfig; import org.apache.pinot.segment.spi.index.FieldIndexConfigs; +import org.apache.pinot.segment.spi.index.IndexType; import org.apache.pinot.segment.spi.index.StandardIndexes; import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.IndexConfig; import org.apache.pinot.spi.config.table.Intern; -import org.apache.pinot.spi.config.table.JsonIndexConfig; import org.apache.pinot.spi.data.DimensionFieldSpec; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.MetricFieldSpec; import org.apache.pinot.spi.utils.JsonUtils; +import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.Test; @@ -349,28 +350,60 @@ public void testStandardIndex() { + "the DictionaryIndexType static instance"); } + /** + * Tests to verify various combinations of inputs to test dictionary override optimization. + */ @Test public void testDictionaryOverride() { MetricFieldSpec metric = new MetricFieldSpec("testCol", FieldSpec.DataType.DOUBLE); - FieldIndexConfigs fieldIndexConfigs = new FieldIndexConfigs.Builder().build(); + IndexType index1 = Mockito.mock(IndexType.class); + Mockito.when(index1.getId()).thenReturn("index1"); + IndexConfig indexConf = new IndexConfig(true); + FieldIndexConfigs fieldIndexConfigs = new FieldIndexConfigs.Builder().add(index1, indexConf).build(); // No need to disable dictionary - assertTrue(DictionaryIndexType.ignoreDictionaryOverride(false, true, 2, metric, fieldIndexConfigs, 5, 20)); + boolean result = + DictionaryIndexType.ignoreDictionaryOverride(false, true, 2, null, metric, fieldIndexConfigs, 5, 20); + assertTrue(result); // Set a higher noDictionarySizeRatioThreshold - assertFalse(DictionaryIndexType.ignoreDictionaryOverride(false, true, 5, metric, fieldIndexConfigs, 5, 20)); + result = DictionaryIndexType.ignoreDictionaryOverride(false, true, 5, null, metric, fieldIndexConfigs, 5, 20); + assertFalse(result); // optimizeDictionary and optimizeDictionaryForMetrics both turned on - assertFalse(DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, metric, fieldIndexConfigs, 5, 20)); + result = DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, null, metric, fieldIndexConfigs, 5, 20); + assertFalse(result); + + // noDictionarySizeRatioThreshold and noDictionaryCardinalityThreshold are provided + result = DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, 0.10, metric, fieldIndexConfigs, 5, 100); + assertTrue(result); + + // cardinality is much less than total docs, use dictionary + metric.setDataType(FieldSpec.DataType.STRING); + result = DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, 0.10, metric, fieldIndexConfigs, 5, 100); + assertTrue(result); + + // cardinality is large % of total docs, do not use dictionary + result = DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, 0.10, metric, fieldIndexConfigs, 5, 20); + assertFalse(result); + + // Test Dimension col + // Don't ignore for Json. We want to disable dictionary for json. + DimensionFieldSpec dimension = new DimensionFieldSpec("testCol", FieldSpec.DataType.JSON, true); + result = DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, null, dimension, fieldIndexConfigs, 5, 20); + assertTrue(result); + + // cardinality is much less than total docs, use dictionary + dimension.setDataType(FieldSpec.DataType.STRING); + result = DictionaryIndexType.ignoreDictionaryOverride(true, false, 5, 0.10, dimension, fieldIndexConfigs, 5, 100); + assertTrue(result); + + // cardinality is large % of total docs, do not use dictionary + result = DictionaryIndexType.ignoreDictionaryOverride(true, false, 5, 0.10, dimension, fieldIndexConfigs, 5, 20); + assertFalse(result); // Ignore for inverted index IndexConfig indexConfig = new IndexConfig(false); fieldIndexConfigs = new FieldIndexConfigs.Builder().add(StandardIndexes.inverted(), indexConfig).build(); - assertTrue(DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, metric, fieldIndexConfigs, 5, 20)); - - // Don't ignore for JSON index - DimensionFieldSpec dimension = new DimensionFieldSpec("testCol", FieldSpec.DataType.JSON, true); - JsonIndexConfig jsonIndexConfig = new JsonIndexConfig(); - fieldIndexConfigs = new FieldIndexConfigs.Builder().add(StandardIndexes.json(), jsonIndexConfig).build(); - assertFalse(DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, dimension, fieldIndexConfigs, 5, 20)); + assertTrue(DictionaryIndexType.ignoreDictionaryOverride(true, true, 5, null, metric, fieldIndexConfigs, 5, 20)); } } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java index 4424be0883e6..f5065d417f4a 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java @@ -127,6 +127,7 @@ public enum TimeColumnType { private boolean _optimizeDictionary = false; private boolean _optimizeDictionaryForMetrics = false; private double _noDictionarySizeRatioThreshold = IndexingConfig.DEFAULT_NO_DICTIONARY_SIZE_RATIO_THRESHOLD; + private Double _noDictionaryCardinalityRatioThreshold; private boolean _realtimeConversion = false; // consumerDir contains data from the consuming segment, and is used during _realtimeConversion optimization private File _consumerDir; @@ -208,6 +209,7 @@ public SegmentGeneratorConfig(TableConfig tableConfig, Schema schema) { _optimizeDictionary = indexingConfig.isOptimizeDictionary(); _optimizeDictionaryForMetrics = indexingConfig.isOptimizeDictionaryForMetrics(); _noDictionarySizeRatioThreshold = indexingConfig.getNoDictionarySizeRatioThreshold(); + _noDictionaryCardinalityRatioThreshold = indexingConfig.getNoDictionaryCardinalityRatioThreshold(); } IngestionConfig ingestionConfig = tableConfig.getIngestionConfig(); @@ -805,6 +807,16 @@ public void setNoDictionarySizeRatioThreshold(double noDictionarySizeRatioThresh _noDictionarySizeRatioThreshold = noDictionarySizeRatioThreshold; } + @Nullable + public Double getNoDictionaryCardinalityRatioThreshold() { + return _noDictionaryCardinalityRatioThreshold; + } + + public void setNoDictionaryCardinalityRatioThreshold(@Nullable Double noDictionaryCardinalityRatioThreshold) { + _noDictionaryCardinalityRatioThreshold = noDictionaryCardinalityRatioThreshold; + } + + public boolean isFailOnEmptySegment() { return _failOnEmptySegment; } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java index 1cfd6107107c..369f27767507 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java @@ -27,7 +27,7 @@ public class IndexingConfig extends BaseJsonConfig { - // Default ratio for overriding dictionary + // Default ratio for overriding dictionary for fixed width columns public static final double DEFAULT_NO_DICTIONARY_SIZE_RATIO_THRESHOLD = 0.85d; /** @@ -78,6 +78,10 @@ public class IndexingConfig extends BaseJsonConfig { private double _noDictionarySizeRatioThreshold = DEFAULT_NO_DICTIONARY_SIZE_RATIO_THRESHOLD; + // Used in conjunction with `optimizeDictionary`, if cardinality / total docs is less than the threshold, + // then create a dictionary for the column. A value around 0.1 (10%) is a reasonable starting point + private Double _noDictionaryCardinalityRatioThreshold; + // TODO: Add a new configuration related to the segment generation private boolean _autoGeneratedInvertedIndex; private boolean _createInvertedIndexDuringSegmentGeneration; @@ -372,6 +376,15 @@ public void setNoDictionarySizeRatioThreshold(double noDictionarySizeRatioThresh _noDictionarySizeRatioThreshold = noDictionarySizeRatioThreshold; } + @Nullable + public Double getNoDictionaryCardinalityRatioThreshold() { + return _noDictionaryCardinalityRatioThreshold; + } + + public void setNoDictionaryCardinalityRatioThreshold(Double noDictionaryCardinalityRatioThreshold) { + _noDictionaryCardinalityRatioThreshold = noDictionaryCardinalityRatioThreshold; + } + public String getSegmentNameGeneratorType() { return _segmentNameGeneratorType; } diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/IndexingConfigTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/IndexingConfigTest.java index f97e9b30b1a1..d3b9b8fc364a 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/IndexingConfigTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/IndexingConfigTest.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.spi.config.table; +import com.fasterxml.jackson.core.JsonProcessingException; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; @@ -27,6 +28,7 @@ import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; @@ -97,4 +99,27 @@ public void testSegmentPartitionConfig() assertEquals(actualPartitionConfig.getNumPartitions(column), expectedPartitionConfig.getNumPartitions(column)); } } + + @Test + public void testOptimizeDictionaryConfigs() + throws JsonProcessingException { + String indexingConfigStr = "{" + + "\"optimizeDictionary\": true," + + "\"optimizeDictionaryForMetrics\": true," + + "\"noDictionarySizeRatioThreshold\": 0.50" + + "}"; + IndexingConfig indexingConfig = JsonUtils.stringToObject(indexingConfigStr, IndexingConfig.class); + assertTrue(indexingConfig.isOptimizeDictionary()); + assertTrue(indexingConfig.isOptimizeDictionaryForMetrics()); + assertEquals(indexingConfig.getNoDictionarySizeRatioThreshold(), 0.50d); + assertNull(indexingConfig.getNoDictionaryCardinalityRatioThreshold()); + + indexingConfigStr = "{" + + "\"optimizeDictionary\": true," + + "\"noDictionaryCardinalityRatioThreshold\": 0.07" + + "}"; + indexingConfig = JsonUtils.stringToObject(indexingConfigStr, IndexingConfig.class); + assertTrue(indexingConfig.isOptimizeDictionary()); + assertEquals(indexingConfig.getNoDictionaryCardinalityRatioThreshold(), 0.07d); + } }