Support map type

apache · Jun 21, 2024 · 591e9d9 · 591e9d9
1 parent 74e1a14
commit 591e9d9
Show file tree

Hide file tree

Showing 59 changed files with 6,056 additions and 32 deletions.
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java b/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java
@@ -285,6 +285,12 @@ public enum TransformFunctionType {
 
   ARRAY_VALUE_CONSTRUCTOR("arrayValueConstructor", "array_value_constructor"),
 
+  // MAP Functions
+  ITEM("item",
+      ReturnTypes.cascade(opBinding -> opBinding.getOperandType(0).getComponentType(),
+          SqlTypeTransforms.FORCE_NULLABLE),
+      OperandTypes.family(ImmutableList.of(SqlTypeFamily.MAP, SqlTypeFamily.STRING))),
+
   // Trigonometry
   SIN("sin"),
   COS("cos"),

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
@@ -206,6 +206,7 @@ public enum ColumnDataType {
     TIMESTAMP(LONG, NullValuePlaceHolder.LONG),
     STRING(NullValuePlaceHolder.STRING),
     JSON(STRING, NullValuePlaceHolder.STRING),
+    MAP(null),
     BYTES(NullValuePlaceHolder.INTERNAL_BYTES),
     OBJECT(null),
     INT_ARRAY(NullValuePlaceHolder.INT_ARRAY),
@@ -494,6 +495,7 @@ public Serializable convertAndFormat(Object value) {
           return new Timestamp((long) value).toString();
         case STRING:
         case JSON:
+        case MAP:
           return value.toString();
         case BYTES:
           return ((ByteArray) value).toHexString();
@@ -676,6 +678,8 @@ public static ColumnDataType fromDataTypeSV(DataType dataType) {
           return STRING;
         case JSON:
           return JSON;
+        case MAP:
+          return MAP;
         case BYTES:
           return BYTES;
         case UNKNOWN:

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java
@@ -23,6 +23,7 @@
 import java.sql.Timestamp;
 import java.util.Base64;
 import java.util.Collection;
+import java.util.Map;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
 import org.apache.pinot.spi.data.FieldSpec;
@@ -816,6 +817,24 @@ public Object convert(Object value, PinotDataType sourceType) {
     }
   },
 
+  MAP {
+    @Override
+    public Object convert(Object value, PinotDataType sourceType) {
+      switch (sourceType) {
+        case OBJECT:
+          if (value instanceof Map) {
+            return value;
+          } else {
+            throw new UnsupportedOperationException(String.format("Cannot convert '%s' (Class of value: '%s') to MAP",
+                sourceType, value.getClass()));
+          }
+        default:
+          throw new UnsupportedOperationException(String.format("Cannot convert '%s' (Class of value: '%s') to MAP",
+              sourceType, value.getClass()));
+      }
+    }
+  },
+
   BYTE_ARRAY {
     @Override
     public byte[] toBytes(Object value) {
@@ -1444,6 +1463,11 @@ public static PinotDataType getPinotDataTypeForIngestion(FieldSpec fieldSpec) {
         return fieldSpec.isSingleValueField() ? STRING : STRING_ARRAY;
       case BYTES:
         return fieldSpec.isSingleValueField() ? BYTES : BYTES_ARRAY;
+      case MAP:
+        if (fieldSpec.isSingleValueField()) {
+          return MAP;
+        }
+        throw new IllegalStateException("There is no multi-value type for MAP");
       default:
         throw new UnsupportedOperationException(
             "Unsupported data type: " + dataType + " in field: " + fieldSpec.getName());

diff --git a/...common/src/test/java/org/apache/pinot/common/function/FunctionDefinitionRegistryTest.java b/...common/src/test/java/org/apache/pinot/common/function/FunctionDefinitionRegistryTest.java
@@ -44,6 +44,8 @@ public class FunctionDefinitionRegistryTest {
       "geotoh3",
       // ArrayToMV and ArrayValueConstructor are placeholder functions without implementation
       "arraytomv", "arrayvalueconstructor",
+      // item is used for map type, not needed for register
+      "item",
       // Scalar function
       "scalar",
       // Functions without scalar function counterpart as of now

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/common/DataFetcher.java b/pinot-core/src/main/java/org/apache/pinot/core/common/DataFetcher.java
@@ -606,6 +606,11 @@ void readStringValues(int[] docIds, int length, String[] valueBuffer) {
               valueBuffer[i] = BytesUtils.toHexString(_reader.getBytes(docIds[i], readerContext));
             }
             break;
+          case MAP:
+            for (int i = 0; i < length; i++) {
+              valueBuffer[i] = _reader.getString(docIds[i], readerContext);
+            }
+            break;
           default:
             throw new IllegalStateException();
         }

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/common/RowBasedBlockValueFetcher.java b/pinot-core/src/main/java/org/apache/pinot/core/common/RowBasedBlockValueFetcher.java
@@ -65,6 +65,8 @@ private ValueFetcher createFetcher(BlockValSet blockValSet) {
           return new BigDecimalValueFetcher(blockValSet.getBigDecimalValuesSV());
         case STRING:
           return new StringSingleValueFetcher(blockValSet.getStringValuesSV());
+        case MAP:
+          return new StringSingleValueFetcher(blockValSet.getStringValuesSV());
         case BYTES:
           return new BytesValueFetcher(blockValSet.getBytesValuesSV());
         case UNKNOWN:

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/map/MapUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/map/MapUtils.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.pinot.core.map;
+
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.segment.local.segment.index.map.MapDataSource;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+
+
+public class MapUtils {
+  private MapUtils() {
+  }
+
+  /**
+   * In the current model of integration between Map columns and the Pinot query engine, when an Item operation is
+   * applied to a map column (e.g., `myMap['foo']`) we create a new DataSource that treats that expression as if it
+   * were a column.  In other words, the Query Engine treats a Key within a Map column just as it would a user
+   * defined Column. In order for this to work, we must map Item operations to unique column  names and then map
+   * those unique column names to a Data Source. This function handles traversing a query expression, finding any
+   * Map Item operations, constructing the unique internal column and mapping it to the appropriate Key Data Source.
+   *
+   * @param indexSegment
+   * @param dataSourceMap - the Caller's mapping from column names to Data Source for that column. This function will
+   *                      add Key's to this mapping.
+   * @param expression - The expression to analyze for Map Item operations.
+   */
+  public static void addMapItemOperationsToDataSourceMap(IndexSegment indexSegment,
+      Map<String, DataSource> dataSourceMap, ExpressionContext expression) {
+    if (expression.getType() == ExpressionContext.Type.FUNCTION) {
+      if (expression.getFunction().getFunctionName().equals("item")) {
+        String columnOp = expression.getFunction().getArguments().get(0).toString();
+        String key = expression.getFunction().getArguments().get(1).getLiteral().getStringValue();
+
+        dataSourceMap.put(constructKeyDataSourceIdentifier(columnOp, key),
+            ((MapDataSource) indexSegment.getDataSource(columnOp)).getKeyDataSource(key));
+      } else {
+        // Iterate over the operands and check if any of them are Map Item operations
+        expression.getFunction().getArguments().forEach(
+            arg -> addMapItemOperationsToDataSourceMap(indexSegment, dataSourceMap, arg));
+      }
+    }
+  }
+
+  /**
+   * Constructs the internal identifier for DataSources that represent the values of a specific key within a Map
+   * column.
+   *
+   * @param column
+   * @param key
+   * @return
+   */
+  public static String constructKeyDataSourceIdentifier(String column, String key) {
+    return String.format("map_col__%s.%s", column, key);
+  }
+}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ExpressionFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/ExpressionFilterOperator.java
@@ -29,6 +29,7 @@
 import org.apache.pinot.common.utils.HashUtil;
 import org.apache.pinot.core.common.BlockDocIdSet;
 import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.map.MapUtils;
 import org.apache.pinot.core.operator.ColumnContext;
 import org.apache.pinot.core.operator.dociditerators.ExpressionScanDocIdIterator;
 import org.apache.pinot.core.operator.docidsets.ExpressionDocIdSet;
@@ -66,6 +67,7 @@ public ExpressionFilterOperator(IndexSegment segment, QueryContext queryContext,
       _dataSourceMap.put(column, dataSource);
       columnContextMap.put(column, ColumnContext.fromDataSource(dataSource));
     });
+    MapUtils.addMapItemOperationsToDataSourceMap(segment, _dataSourceMap, lhs);
     _transformFunction = TransformFunctionFactory.get(lhs, columnContextMap, _queryContext);
     _predicateType = predicate.getType();
     if (_predicateType == Predicate.Type.IS_NULL || _predicateType == Predicate.Type.IS_NOT_NULL) {

diff --git a/...main/java/org/apache/pinot/core/operator/transform/function/MapItemTransformFunction.java b/...main/java/org/apache/pinot/core/operator/transform/function/MapItemTransformFunction.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.transform.function;
+
+import com.google.common.base.Preconditions;
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.core.map.MapUtils;
+import org.apache.pinot.core.operator.ColumnContext;
+import org.apache.pinot.core.operator.blocks.ValueBlock;
+import org.apache.pinot.core.operator.transform.TransformResultMetadata;
+import org.apache.pinot.segment.local.segment.index.map.MapDataSource;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.index.reader.Dictionary;
+import org.apache.pinot.spi.data.FieldSpec;
+
+
+/**
+ * Evaluates myMap['foo']
+ */
+public class MapItemTransformFunction {
+  public static class MapItemFunction extends BaseTransformFunction {
+    public static final String FUNCTION_NAME = "map_item";
+    String _column;
+    String _key;
+    String _keyDataSourceId;
+    TransformFunction _mapValue;
+    TransformFunction _keyValue;
+    Dictionary _keyDictionary;
+    private TransformResultMetadata _resultMetadata;
+
+    public MapItemFunction() {
+      _column = null;
+      _key = null;
+      _keyDataSourceId = null;
+    }
+
+    @Override
+    public void init(List<TransformFunction> arguments, Map<String, ColumnContext> columnContextMap) {
+      super.init(arguments, columnContextMap);
+      // Should be exactly 2 arguments (map value expression and key expression
+      if (arguments.size() != 2) {
+        throw new IllegalArgumentException("Exactly 1 argument is required for Vector transform function");
+      }
+
+      // Check if the second operand (the key) is a string literal, if it is then we can directly construct the
+      // MapDataSource which will pre-compute the Key ID.
+
+      _mapValue = arguments.get(0);
+      Preconditions.checkArgument(_mapValue instanceof IdentifierTransformFunction, "Map Item: Left operand"
+          + "must be an identifier");
+      _column = ((IdentifierTransformFunction) _mapValue).getColumnName();
+      if (_column == null) {
+        throw new IllegalArgumentException("Map Item: left operand resolved to a null column name");
+      }
+
+      _keyValue = arguments.get(1);
+      Preconditions.checkArgument(_keyValue instanceof LiteralTransformFunction, "Map Item: Right operand"
+          + "must be a literal");
+      _key = ((LiteralTransformFunction) arguments.get(1)).getStringLiteral();
+      Preconditions.checkArgument(_key != null, "Map Item: Right operand"
+          + "must be a string literal");
+
+      _keyDataSourceId = MapUtils.constructKeyDataSourceIdentifier(_column, _key);
+
+      // The metadata about the values that this operation will resolve to is determined by the type of teh data
+      // under they key, not by the Map column.  So we need to look up the Key's Metadata.
+      MapDataSource mapDS = (MapDataSource) columnContextMap.get(_column).getDataSource();
+      if (mapDS == null) {
+        // This should _always_ be a Map Data Source.
+        throw new RuntimeException("The left operand for a MAP ITEM operation must resolve to a Map Data Source");
+      }
+
+      DataSource keyDS = mapDS.getKeyDataSource(_key);
+      FieldSpec.DataType keyType = keyDS.getDataSourceMetadata().getDataType().getStoredType();
+      _keyDictionary = keyDS.getDictionary();
+      _resultMetadata =
+          new TransformResultMetadata(keyType, keyDS.getDataSourceMetadata().isSingleValue(),
+              _keyDictionary != null);
+    }
+
+    @Override
+    public String getName() {
+      return FUNCTION_NAME;
+    }
+
+    @Override
+    public TransformResultMetadata getResultMetadata() {
+      return new TransformResultMetadata(_resultMetadata.getDataType().getStoredType(), true,
+          _resultMetadata.hasDictionary());
+    }
+
+    @Override
+    public Dictionary getDictionary() {
+      return _keyDictionary;
+    }
+
+    @Override
+    public int[] transformToDictIdsSV(ValueBlock valueBlock) {
+      return transformToIntValuesSV(valueBlock);
+    }
+
+    @Override
+    public int[] transformToIntValuesSV(ValueBlock valueBlock) {
+      return valueBlock.getBlockValueSet(_keyDataSourceId).getIntValuesSV();
+    }
+
+    @Override
+    public long[] transformToLongValuesSV(ValueBlock valueBlock) {
+      return valueBlock.getBlockValueSet(_keyDataSourceId).getLongValuesSV();
+    }
+
+    @Override
+    public double[] transformToDoubleValuesSV(ValueBlock valueBlock) {
+      return valueBlock.getBlockValueSet(_keyDataSourceId).getDoubleValuesSV();
+    }
+
+    @Override
+    public String[] transformToStringValuesSV(ValueBlock valueBlock) {
+      return valueBlock.getBlockValueSet(_keyDataSourceId).getStringValuesSV();
+    }
+  }
+}
diff --git a/...main/java/org/apache/pinot/core/operator/transform/function/TransformFunctionFactory.java b/...main/java/org/apache/pinot/core/operator/transform/function/TransformFunctionFactory.java
@@ -241,6 +241,9 @@ private static Map<String, Class<? extends TransformFunction>> createRegistry()
     typeToImplementation.put(TransformFunctionType.VECTOR_DIMS, VectorDimsTransformFunction.class);
     typeToImplementation.put(TransformFunctionType.VECTOR_NORM, VectorNormTransformFunction.class);
 
+    // Map functions
+    typeToImplementation.put(TransformFunctionType.ITEM, MapItemTransformFunction.MapItemFunction.class);
+
     Map<String, Class<? extends TransformFunction>> registry
         = new HashMap<>(HashUtil.getHashMapCapacity(typeToImplementation.size()));
     for (Map.Entry<TransformFunctionType, Class<? extends TransformFunction>> entry : typeToImplementation.entrySet()) {

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/ProjectPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/ProjectPlanNode.java
@@ -26,6 +26,7 @@
 import javax.annotation.Nullable;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.common.utils.HashUtil;
+import org.apache.pinot.core.map.MapUtils;
 import org.apache.pinot.core.operator.BaseProjectOperator;
 import org.apache.pinot.core.operator.DocIdSetOperator;
 import org.apache.pinot.core.operator.ProjectionOperator;
@@ -67,15 +68,28 @@ public ProjectPlanNode(SegmentContext segmentContext, QueryContext queryContext,
   @Override
   public BaseProjectOperator<?> run() {
     Set<String> projectionColumns = new HashSet<>();
+
     boolean hasNonIdentifierExpression = false;
     for (ExpressionContext expression : _expressions) {
       expression.getColumns(projectionColumns);
+
       if (expression.getType() != ExpressionContext.Type.IDENTIFIER) {
         hasNonIdentifierExpression = true;
       }
     }
     Map<String, DataSource> dataSourceMap = new HashMap<>(HashUtil.getHashMapCapacity(projectionColumns.size()));
     projectionColumns.forEach(column -> dataSourceMap.put(column, _indexSegment.getDataSource(column)));
+
+    // TODO(ERICH): if the expression type is an item op with map col then create a MapDataSource and pass the key
+    for (ExpressionContext expression : _expressions) {
+      MapUtils.addMapItemOperationsToDataSourceMap(_indexSegment, dataSourceMap, expression);
+    }
+
+    if (_queryContext.getFilter() != null && _queryContext.getFilter().getPredicate() != null) {
+      MapUtils.addMapItemOperationsToDataSourceMap(_indexSegment, dataSourceMap,
+          _queryContext.getFilter().getPredicate().getLhs());
+    }
+
     // NOTE: Skip creating DocIdSetOperator when maxDocsPerCall is 0 (for selection query with LIMIT 0)
     DocIdSetOperator docIdSetOperator =
         _maxDocsPerCall > 0 ? new DocIdSetPlanNode(_segmentContext, _queryContext, _maxDocsPerCall,