Skip to content

Commit

Permalink
Support map type
Browse files Browse the repository at this point in the history
  • Loading branch information
ege-st authored and xiangfu0 committed Aug 9, 2024
1 parent c9e5d8a commit 0229ca1
Show file tree
Hide file tree
Showing 71 changed files with 6,280 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.pinot.common.response.ProcessingException;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.common.utils.HashUtil;
import org.apache.pinot.common.utils.MapUtils;
import org.apache.pinot.common.utils.RoaringBitmapUtils;
import org.apache.pinot.spi.accounting.ThreadResourceUsageProvider;
import org.apache.pinot.spi.utils.BigDecimalUtils;
Expand Down Expand Up @@ -338,6 +339,14 @@ public String[] getStringArray(int rowId, int colId) {
return strings;
}

@Override
public Map<String, Object> getMap(int rowId, int colId) {
int size = positionOffsetInVariableBufferAndGetLength(rowId, colId);
ByteBuffer buffer = _variableSizeData.slice();
buffer.limit(size);
return MapUtils.deserializeMap(buffer);
}

@Nullable
@Override
public CustomObject getCustomObject(int rowId, int colId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ byte[] toBytes()

String[] getStringArray(int rowId, int colId);

Map<String, Object> getMap(int rowId, int colId);

CustomObject getCustomObject(int rowId, int colId);

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ byte[] toBytes()

String[] getStringArray(int rowId, int colId);

@Nullable
Map<String, Object> getMap(int rowId, int colId);

@Nullable
CustomObject getCustomObject(int rowId, int colId);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.pinot.common.response.ProcessingException;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.common.utils.HashUtil;
import org.apache.pinot.common.utils.MapUtils;
import org.apache.pinot.common.utils.RoaringBitmapUtils;
import org.apache.pinot.spi.accounting.ThreadResourceUsageProvider;
import org.apache.pinot.spi.trace.Tracing;
Expand Down Expand Up @@ -317,6 +318,18 @@ public String[] getStringArray(int rowId, int colId) {
return strings;
}

@Nullable
@Override
public Map<String, Object> getMap(int rowId, int colId) {
int size = positionOffsetInVariableBufferAndGetLength(rowId, colId);
if (size == 0) {
return null;
}
ByteBuffer buffer = _variableSizeData.slice();
buffer.limit(size);
return MapUtils.deserializeMap(buffer);
}

@Nullable
@Override
public CustomObject getCustomObject(int rowId, int colId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,12 @@ public enum TransformFunctionType {
VECTOR_DIMS("vectorDims", ReturnTypes.INTEGER, OperandTypes.ARRAY),
VECTOR_NORM("vectorNorm", ReturnTypes.DOUBLE, OperandTypes.ARRAY),

// MAP Functions
ITEM("item",
ReturnTypes.cascade(opBinding -> opBinding.getOperandType(0).getComponentType(),
SqlTypeTransforms.FORCE_NULLABLE),
OperandTypes.family(List.of(SqlTypeFamily.MAP, SqlTypeFamily.STRING))),

// Trigonometry
SIN("sin"),
COS("cos"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ public enum ColumnDataType {
TIMESTAMP(LONG, NullValuePlaceHolder.LONG),
STRING(NullValuePlaceHolder.STRING),
JSON(STRING, NullValuePlaceHolder.STRING),
MAP(null),
BYTES(NullValuePlaceHolder.INTERNAL_BYTES),
OBJECT(null),
INT_ARRAY(NullValuePlaceHolder.INT_ARRAY),
Expand Down Expand Up @@ -494,6 +495,7 @@ public Serializable convertAndFormat(Object value) {
return new Timestamp((long) value).toString();
case STRING:
case JSON:
case MAP:
return value.toString();
case BYTES:
return ((ByteArray) value).toHexString();
Expand Down Expand Up @@ -676,6 +678,8 @@ public static ColumnDataType fromDataTypeSV(DataType dataType) {
return STRING;
case JSON:
return JSON;
case MAP:
return MAP;
case BYTES:
return BYTES;
case UNKNOWN:
Expand Down
105 changes: 105 additions & 0 deletions pinot-common/src/main/java/org/apache/pinot/common/utils/MapUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.common.utils;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.nio.charset.StandardCharsets.UTF_8;


public class MapUtils {

private static final Logger LOGGER = LoggerFactory.getLogger(MapUtils.class);

private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

private MapUtils() {
}

public static byte[] serializeMap(Map<String, Object> map)
throws JsonProcessingException {
int size = map.size();

// Directly return the size (0) for empty map
if (size == 0) {
return new byte[Integer.BYTES];
}

// Besides the value bytes, we store: size, length for each key, length for each value
long bufferSize = (1 + 2 * (long) size) * Integer.BYTES;
byte[][] keyBytesArray = new byte[size][];
byte[][] valueBytesArray = new byte[size][];

int index = 0;
for (Map.Entry<String, Object> entry : map.entrySet()) {
byte[] keyBytes = entry.getKey().getBytes(UTF_8);
bufferSize += keyBytes.length;
keyBytesArray[index] = keyBytes;
byte[] valueBytes = OBJECT_MAPPER.writeValueAsBytes(entry.getValue());
bufferSize += valueBytes.length;
valueBytesArray[index++] = valueBytes;
}
Preconditions.checkState(bufferSize <= Integer.MAX_VALUE, "Buffer size exceeds 2GB");
byte[] bytes = new byte[(int) bufferSize];
ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
byteBuffer.putInt(size);
for (int i = 0; i < index; i++) {
byte[] keyBytes = keyBytesArray[i];
byteBuffer.putInt(keyBytes.length);
byteBuffer.put(keyBytes);
byte[] valueBytes = valueBytesArray[i];
byteBuffer.putInt(valueBytes.length);
byteBuffer.put(valueBytes);
}
return bytes;
}

public static Map<String, Object> deserializeMap(ByteBuffer byteBuffer) {
int size = byteBuffer.getInt();
if (size == 0) {
return Map.of();
}

Map<String, Object> map = new java.util.HashMap<>(size);
for (int i = 0; i < size; i++) {
int keyLength = byteBuffer.getInt();
byte[] keyBytes = new byte[keyLength];
byteBuffer.get(keyBytes);
String key = new String(keyBytes, UTF_8);
int valueLength = byteBuffer.getInt();
byte[] valueBytes = new byte[valueLength];
byteBuffer.get(valueBytes);
Object value = null;
try {
value = OBJECT_MAPPER.readValue(valueBytes, Object.class);
} catch (IOException e) {
LOGGER.error("Caught exception while deserializing value for key: {}", key, e);
}
map.put(key, value);
}
return map;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.sql.Timestamp;
import java.util.Base64;
import java.util.Collection;
import java.util.Map;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
import org.apache.pinot.spi.data.FieldSpec;
Expand Down Expand Up @@ -816,6 +817,24 @@ public Object convert(Object value, PinotDataType sourceType) {
}
},

MAP {
@Override
public Object convert(Object value, PinotDataType sourceType) {
switch (sourceType) {
case OBJECT:
if (value instanceof Map) {
return value;
} else {
throw new UnsupportedOperationException(String.format("Cannot convert '%s' (Class of value: '%s') to MAP",
sourceType, value.getClass()));
}
default:
throw new UnsupportedOperationException(String.format("Cannot convert '%s' (Class of value: '%s') to MAP",
sourceType, value.getClass()));
}
}
},

BYTE_ARRAY {
@Override
public byte[] toBytes(Object value) {
Expand Down Expand Up @@ -1468,6 +1487,11 @@ public static PinotDataType getPinotDataTypeForIngestion(FieldSpec fieldSpec) {
return fieldSpec.isSingleValueField() ? STRING : STRING_ARRAY;
case BYTES:
return fieldSpec.isSingleValueField() ? BYTES : BYTES_ARRAY;
case MAP:
if (fieldSpec.isSingleValueField()) {
return MAP;
}
throw new IllegalStateException("There is no multi-value type for MAP");
default:
throw new UnsupportedOperationException(
"Unsupported data type: " + dataType + " in field: " + fieldSpec.getName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,11 @@ void readStringValues(int[] docIds, int length, String[] valueBuffer) {
valueBuffer[i] = BytesUtils.toHexString(_reader.getBytes(docIds[i], readerContext));
}
break;
case MAP:
for (int i = 0; i < length; i++) {
valueBuffer[i] = _reader.getString(docIds[i], readerContext);
}
break;
default:
throw new IllegalStateException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ private ValueFetcher createFetcher(BlockValSet blockValSet) {
return new BigDecimalValueFetcher(blockValSet.getBigDecimalValuesSV());
case STRING:
return new StringSingleValueFetcher(blockValSet.getStringValuesSV());
case MAP:
return new StringSingleValueFetcher(blockValSet.getStringValuesSV());
case BYTES:
return new BytesValueFetcher(blockValSet.getBytesValuesSV());
case UNKNOWN:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
import org.apache.pinot.common.CustomObject;
Expand All @@ -34,6 +35,7 @@
import org.apache.pinot.common.datablock.RowDataBlock;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
import org.apache.pinot.common.utils.MapUtils;
import org.apache.pinot.common.utils.RoaringBitmapUtils;
import org.apache.pinot.core.common.ObjectSerDeUtils;
import org.apache.pinot.spi.utils.BigDecimalUtils;
Expand Down Expand Up @@ -171,6 +173,11 @@ public static RowDataBlock buildFromRows(List<Object[]> rows, DataSchema dataSch
setColumn(rowBuilder, byteBuffer, (String[]) value);
break;

// Map column
case MAP:
setColumn(rowBuilder, byteBuffer, (Map<String, Object>) value);
break;

// Special intermediate result for aggregation function
case OBJECT:
setColumn(rowBuilder, byteBuffer, value);
Expand Down Expand Up @@ -343,6 +350,18 @@ public static ColumnarDataBlock buildFromColumns(List<Object[]> columns, DataSch
}
break;

// Map column
case MAP:
for (int rowId = 0; rowId < numRows; rowId++) {
value = column[rowId];
if (value == null) {
nullBitmaps[colId].add(rowId);
value = nullPlaceholders[colId];
}
setColumn(columnarBuilder, byteBuffer, (Map<String, Object>) value);
}
break;

// Special intermediate result for aggregation function
case OBJECT:
for (int rowId = 0; rowId < numRows; rowId++) {
Expand Down Expand Up @@ -413,6 +432,19 @@ private static void setColumn(DataBlockBuilder builder, ByteBuffer byteBuffer, B
builder._variableSizeDataByteArrayOutputStream.write(bytes);
}

private static void setColumn(DataBlockBuilder builder, ByteBuffer byteBuffer, @Nullable Map<String, Object> value)
throws IOException {
byteBuffer.putInt(builder._variableSizeDataByteArrayOutputStream.size());
if (value == null) {
byteBuffer.putInt(0);
builder._variableSizeDataOutputStream.writeInt(CustomObject.NULL_TYPE_VALUE);
} else {
byte[] bytes = MapUtils.serializeMap(value);
byteBuffer.putInt(bytes.length);
builder._variableSizeDataByteArrayOutputStream.write(bytes);
}
}

// TODO: Move ser/de into AggregationFunction interface
private static void setColumn(DataBlockBuilder builder, ByteBuffer byteBuffer, @Nullable Object value)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.Map;
import javax.annotation.Nullable;
import org.apache.pinot.common.CustomObject;
import org.apache.pinot.common.datatable.DataTableUtils;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.common.utils.MapUtils;
import org.apache.pinot.core.common.ObjectSerDeUtils;
import org.apache.pinot.spi.utils.BigDecimalUtils;

Expand Down Expand Up @@ -96,6 +98,20 @@ public void setColumn(int colId, BigDecimal value)
_variableSizeDataByteArrayOutputStream.write(bytes);
}

@Override
public void setColumn(int colId, @Nullable Map<String, Object> value)
throws IOException {
_currentRowDataByteBuffer.position(_columnOffsets[colId]);
_currentRowDataByteBuffer.putInt(_variableSizeDataByteArrayOutputStream.size());
if (value == null) {
_currentRowDataByteBuffer.putInt(0);
} else {
byte[] bytes = MapUtils.serializeMap(value);
_currentRowDataByteBuffer.putInt(bytes.length);
_variableSizeDataByteArrayOutputStream.write(bytes);
}
}

@Override
public void setColumn(int colId, @Nullable Object value)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.IOException;
import java.math.BigDecimal;
import java.util.Map;
import javax.annotation.Nullable;
import org.apache.pinot.common.datatable.DataTable;
import org.apache.pinot.spi.annotations.InterfaceAudience;
Expand Down Expand Up @@ -62,6 +63,9 @@ void setColumn(int colId, BigDecimal value)
void setColumn(int colId, ByteArray value)
throws IOException;

void setColumn(int colId, @Nullable Map<String, Object> value)
throws IOException;

// TODO: Move ser/de into AggregationFunction interface
void setColumn(int colId, @Nullable Object value)
throws IOException;
Expand Down
Loading

0 comments on commit 0229ca1

Please sign in to comment.