Skip to content

Commit

Permalink
Support map type
Browse files Browse the repository at this point in the history
  • Loading branch information
ege-st authored and xiangfu0 committed Jun 21, 2024
1 parent 74e1a14 commit 591e9d9
Show file tree
Hide file tree
Showing 59 changed files with 6,056 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,12 @@ public enum TransformFunctionType {

ARRAY_VALUE_CONSTRUCTOR("arrayValueConstructor", "array_value_constructor"),

// MAP Functions
ITEM("item",
ReturnTypes.cascade(opBinding -> opBinding.getOperandType(0).getComponentType(),
SqlTypeTransforms.FORCE_NULLABLE),
OperandTypes.family(ImmutableList.of(SqlTypeFamily.MAP, SqlTypeFamily.STRING))),

// Trigonometry
SIN("sin"),
COS("cos"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ public enum ColumnDataType {
TIMESTAMP(LONG, NullValuePlaceHolder.LONG),
STRING(NullValuePlaceHolder.STRING),
JSON(STRING, NullValuePlaceHolder.STRING),
MAP(null),
BYTES(NullValuePlaceHolder.INTERNAL_BYTES),
OBJECT(null),
INT_ARRAY(NullValuePlaceHolder.INT_ARRAY),
Expand Down Expand Up @@ -494,6 +495,7 @@ public Serializable convertAndFormat(Object value) {
return new Timestamp((long) value).toString();
case STRING:
case JSON:
case MAP:
return value.toString();
case BYTES:
return ((ByteArray) value).toHexString();
Expand Down Expand Up @@ -676,6 +678,8 @@ public static ColumnDataType fromDataTypeSV(DataType dataType) {
return STRING;
case JSON:
return JSON;
case MAP:
return MAP;
case BYTES:
return BYTES;
case UNKNOWN:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.sql.Timestamp;
import java.util.Base64;
import java.util.Collection;
import java.util.Map;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
import org.apache.pinot.spi.data.FieldSpec;
Expand Down Expand Up @@ -816,6 +817,24 @@ public Object convert(Object value, PinotDataType sourceType) {
}
},

MAP {
@Override
public Object convert(Object value, PinotDataType sourceType) {
switch (sourceType) {
case OBJECT:
if (value instanceof Map) {
return value;
} else {
throw new UnsupportedOperationException(String.format("Cannot convert '%s' (Class of value: '%s') to MAP",
sourceType, value.getClass()));
}
default:
throw new UnsupportedOperationException(String.format("Cannot convert '%s' (Class of value: '%s') to MAP",
sourceType, value.getClass()));
}
}
},

BYTE_ARRAY {
@Override
public byte[] toBytes(Object value) {
Expand Down Expand Up @@ -1444,6 +1463,11 @@ public static PinotDataType getPinotDataTypeForIngestion(FieldSpec fieldSpec) {
return fieldSpec.isSingleValueField() ? STRING : STRING_ARRAY;
case BYTES:
return fieldSpec.isSingleValueField() ? BYTES : BYTES_ARRAY;
case MAP:
if (fieldSpec.isSingleValueField()) {
return MAP;
}
throw new IllegalStateException("There is no multi-value type for MAP");
default:
throw new UnsupportedOperationException(
"Unsupported data type: " + dataType + " in field: " + fieldSpec.getName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ public class FunctionDefinitionRegistryTest {
"geotoh3",
// ArrayToMV and ArrayValueConstructor are placeholder functions without implementation
"arraytomv", "arrayvalueconstructor",
// item is used for map type, not needed for register
"item",
// Scalar function
"scalar",
// Functions without scalar function counterpart as of now
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,11 @@ void readStringValues(int[] docIds, int length, String[] valueBuffer) {
valueBuffer[i] = BytesUtils.toHexString(_reader.getBytes(docIds[i], readerContext));
}
break;
case MAP:
for (int i = 0; i < length; i++) {
valueBuffer[i] = _reader.getString(docIds[i], readerContext);
}
break;
default:
throw new IllegalStateException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ private ValueFetcher createFetcher(BlockValSet blockValSet) {
return new BigDecimalValueFetcher(blockValSet.getBigDecimalValuesSV());
case STRING:
return new StringSingleValueFetcher(blockValSet.getStringValuesSV());
case MAP:
return new StringSingleValueFetcher(blockValSet.getStringValuesSV());
case BYTES:
return new BytesValueFetcher(blockValSet.getBytesValuesSV());
case UNKNOWN:
Expand Down
74 changes: 74 additions & 0 deletions pinot-core/src/main/java/org/apache/pinot/core/map/MapUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.pinot.core.map;

import java.util.Map;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.segment.local.segment.index.map.MapDataSource;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.datasource.DataSource;


public class MapUtils {
private MapUtils() {
}

/**
* In the current model of integration between Map columns and the Pinot query engine, when an Item operation is
* applied to a map column (e.g., `myMap['foo']`) we create a new DataSource that treats that expression as if it
* were a column. In other words, the Query Engine treats a Key within a Map column just as it would a user
* defined Column. In order for this to work, we must map Item operations to unique column names and then map
* those unique column names to a Data Source. This function handles traversing a query expression, finding any
* Map Item operations, constructing the unique internal column and mapping it to the appropriate Key Data Source.
*
* @param indexSegment
* @param dataSourceMap - the Caller's mapping from column names to Data Source for that column. This function will
* add Key's to this mapping.
* @param expression - The expression to analyze for Map Item operations.
*/
public static void addMapItemOperationsToDataSourceMap(IndexSegment indexSegment,
Map<String, DataSource> dataSourceMap, ExpressionContext expression) {
if (expression.getType() == ExpressionContext.Type.FUNCTION) {
if (expression.getFunction().getFunctionName().equals("item")) {
String columnOp = expression.getFunction().getArguments().get(0).toString();
String key = expression.getFunction().getArguments().get(1).getLiteral().getStringValue();

dataSourceMap.put(constructKeyDataSourceIdentifier(columnOp, key),
((MapDataSource) indexSegment.getDataSource(columnOp)).getKeyDataSource(key));
} else {
// Iterate over the operands and check if any of them are Map Item operations
expression.getFunction().getArguments().forEach(
arg -> addMapItemOperationsToDataSourceMap(indexSegment, dataSourceMap, arg));
}
}
}

/**
* Constructs the internal identifier for DataSources that represent the values of a specific key within a Map
* column.
*
* @param column
* @param key
* @return
*/
public static String constructKeyDataSourceIdentifier(String column, String key) {
return String.format("map_col__%s.%s", column, key);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.pinot.common.utils.HashUtil;
import org.apache.pinot.core.common.BlockDocIdSet;
import org.apache.pinot.core.common.Operator;
import org.apache.pinot.core.map.MapUtils;
import org.apache.pinot.core.operator.ColumnContext;
import org.apache.pinot.core.operator.dociditerators.ExpressionScanDocIdIterator;
import org.apache.pinot.core.operator.docidsets.ExpressionDocIdSet;
Expand Down Expand Up @@ -66,6 +67,7 @@ public ExpressionFilterOperator(IndexSegment segment, QueryContext queryContext,
_dataSourceMap.put(column, dataSource);
columnContextMap.put(column, ColumnContext.fromDataSource(dataSource));
});
MapUtils.addMapItemOperationsToDataSourceMap(segment, _dataSourceMap, lhs);
_transformFunction = TransformFunctionFactory.get(lhs, columnContextMap, _queryContext);
_predicateType = predicate.getType();
if (_predicateType == Predicate.Type.IS_NULL || _predicateType == Predicate.Type.IS_NOT_NULL) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.operator.transform.function;

import com.google.common.base.Preconditions;
import java.util.List;
import java.util.Map;
import org.apache.pinot.core.map.MapUtils;
import org.apache.pinot.core.operator.ColumnContext;
import org.apache.pinot.core.operator.blocks.ValueBlock;
import org.apache.pinot.core.operator.transform.TransformResultMetadata;
import org.apache.pinot.segment.local.segment.index.map.MapDataSource;
import org.apache.pinot.segment.spi.datasource.DataSource;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.spi.data.FieldSpec;


/**
* Evaluates myMap['foo']
*/
public class MapItemTransformFunction {
public static class MapItemFunction extends BaseTransformFunction {
public static final String FUNCTION_NAME = "map_item";
String _column;
String _key;
String _keyDataSourceId;
TransformFunction _mapValue;
TransformFunction _keyValue;
Dictionary _keyDictionary;
private TransformResultMetadata _resultMetadata;

public MapItemFunction() {
_column = null;
_key = null;
_keyDataSourceId = null;
}

@Override
public void init(List<TransformFunction> arguments, Map<String, ColumnContext> columnContextMap) {
super.init(arguments, columnContextMap);
// Should be exactly 2 arguments (map value expression and key expression
if (arguments.size() != 2) {
throw new IllegalArgumentException("Exactly 1 argument is required for Vector transform function");
}

// Check if the second operand (the key) is a string literal, if it is then we can directly construct the
// MapDataSource which will pre-compute the Key ID.

_mapValue = arguments.get(0);
Preconditions.checkArgument(_mapValue instanceof IdentifierTransformFunction, "Map Item: Left operand"
+ "must be an identifier");
_column = ((IdentifierTransformFunction) _mapValue).getColumnName();
if (_column == null) {
throw new IllegalArgumentException("Map Item: left operand resolved to a null column name");
}

_keyValue = arguments.get(1);
Preconditions.checkArgument(_keyValue instanceof LiteralTransformFunction, "Map Item: Right operand"
+ "must be a literal");
_key = ((LiteralTransformFunction) arguments.get(1)).getStringLiteral();
Preconditions.checkArgument(_key != null, "Map Item: Right operand"
+ "must be a string literal");

_keyDataSourceId = MapUtils.constructKeyDataSourceIdentifier(_column, _key);

// The metadata about the values that this operation will resolve to is determined by the type of teh data
// under they key, not by the Map column. So we need to look up the Key's Metadata.
MapDataSource mapDS = (MapDataSource) columnContextMap.get(_column).getDataSource();
if (mapDS == null) {
// This should _always_ be a Map Data Source.
throw new RuntimeException("The left operand for a MAP ITEM operation must resolve to a Map Data Source");
}

DataSource keyDS = mapDS.getKeyDataSource(_key);
FieldSpec.DataType keyType = keyDS.getDataSourceMetadata().getDataType().getStoredType();
_keyDictionary = keyDS.getDictionary();
_resultMetadata =
new TransformResultMetadata(keyType, keyDS.getDataSourceMetadata().isSingleValue(),
_keyDictionary != null);
}

@Override
public String getName() {
return FUNCTION_NAME;
}

@Override
public TransformResultMetadata getResultMetadata() {
return new TransformResultMetadata(_resultMetadata.getDataType().getStoredType(), true,
_resultMetadata.hasDictionary());
}

@Override
public Dictionary getDictionary() {
return _keyDictionary;
}

@Override
public int[] transformToDictIdsSV(ValueBlock valueBlock) {
return transformToIntValuesSV(valueBlock);
}

@Override
public int[] transformToIntValuesSV(ValueBlock valueBlock) {
return valueBlock.getBlockValueSet(_keyDataSourceId).getIntValuesSV();
}

@Override
public long[] transformToLongValuesSV(ValueBlock valueBlock) {
return valueBlock.getBlockValueSet(_keyDataSourceId).getLongValuesSV();
}

@Override
public double[] transformToDoubleValuesSV(ValueBlock valueBlock) {
return valueBlock.getBlockValueSet(_keyDataSourceId).getDoubleValuesSV();
}

@Override
public String[] transformToStringValuesSV(ValueBlock valueBlock) {
return valueBlock.getBlockValueSet(_keyDataSourceId).getStringValuesSV();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ private static Map<String, Class<? extends TransformFunction>> createRegistry()
typeToImplementation.put(TransformFunctionType.VECTOR_DIMS, VectorDimsTransformFunction.class);
typeToImplementation.put(TransformFunctionType.VECTOR_NORM, VectorNormTransformFunction.class);

// Map functions
typeToImplementation.put(TransformFunctionType.ITEM, MapItemTransformFunction.MapItemFunction.class);

Map<String, Class<? extends TransformFunction>> registry
= new HashMap<>(HashUtil.getHashMapCapacity(typeToImplementation.size()));
for (Map.Entry<TransformFunctionType, Class<? extends TransformFunction>> entry : typeToImplementation.entrySet()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import javax.annotation.Nullable;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.utils.HashUtil;
import org.apache.pinot.core.map.MapUtils;
import org.apache.pinot.core.operator.BaseProjectOperator;
import org.apache.pinot.core.operator.DocIdSetOperator;
import org.apache.pinot.core.operator.ProjectionOperator;
Expand Down Expand Up @@ -67,15 +68,28 @@ public ProjectPlanNode(SegmentContext segmentContext, QueryContext queryContext,
@Override
public BaseProjectOperator<?> run() {
Set<String> projectionColumns = new HashSet<>();

boolean hasNonIdentifierExpression = false;
for (ExpressionContext expression : _expressions) {
expression.getColumns(projectionColumns);

if (expression.getType() != ExpressionContext.Type.IDENTIFIER) {
hasNonIdentifierExpression = true;
}
}
Map<String, DataSource> dataSourceMap = new HashMap<>(HashUtil.getHashMapCapacity(projectionColumns.size()));
projectionColumns.forEach(column -> dataSourceMap.put(column, _indexSegment.getDataSource(column)));

// TODO(ERICH): if the expression type is an item op with map col then create a MapDataSource and pass the key
for (ExpressionContext expression : _expressions) {
MapUtils.addMapItemOperationsToDataSourceMap(_indexSegment, dataSourceMap, expression);
}

if (_queryContext.getFilter() != null && _queryContext.getFilter().getPredicate() != null) {
MapUtils.addMapItemOperationsToDataSourceMap(_indexSegment, dataSourceMap,
_queryContext.getFilter().getPredicate().getLhs());
}

// NOTE: Skip creating DocIdSetOperator when maxDocsPerCall is 0 (for selection query with LIMIT 0)
DocIdSetOperator docIdSetOperator =
_maxDocsPerCall > 0 ? new DocIdSetPlanNode(_segmentContext, _queryContext, _maxDocsPerCall,
Expand Down
Loading

0 comments on commit 591e9d9

Please sign in to comment.