Skip to content

Commit

Permalink
AllowUsingSystemPropertyToConfigCharsetDecoderErrorAction (#42520)
Browse files Browse the repository at this point in the history
* allow using system property to config Charset decoder error action

---------

Co-authored-by: annie-mac <[email protected]>
  • Loading branch information
xinlian12 and annie-mac authored Oct 25, 2024
1 parent f13adb1 commit 2ad2942
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,13 @@ private static int getJavaVersion() {
return version;
}
}

@Test(groups = { "unit" })
public void allowUnquotedControlChars() {
assertThat(Utils.shouldAllowUnquotedControlChars()).isTrue();

System.setProperty("COSMOS.ALLOW_UNQUOTED_CONTROL_CHARS", "false");
assertThat(Utils.shouldAllowUnquotedControlChars()).isFalse();
System.clearProperty("COSMOS.ALLOW_UNQUOTED_CONTROL_CHARS");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.implementation.directconnectivity;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufInputStream;
import io.netty.buffer.Unpooled;
import org.testng.annotations.Ignore;
import org.testng.annotations.Test;

public class JsonNodeStorePayloadTests {
@Test(groups = {"unit"})
@Ignore("fallbackCharsetDecoder will only be initialized during the first time when JsonNodeStorePayload loaded," +
" need to figure out a way to reload the class")
public void parsingBytesWithInvalidUT8Bytes() {
// the hex string represents an json with invalid UTF-8 characters
// json_obj = {
// "id": "example_id",
// "content": "\xff\n\t\x07" # Invalid UTF-8 byte, newline, tab, and BEL
//}
String invalidHexString = "7b226964223a20226578616d706c655f6964222c2022636f6e74656e74223a2022ff0a0907227d";
System.setProperty("COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT", "REPLACE");
System.setProperty("COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER", "REPLACE");

try {
byte[] bytes = hexStringToByteArray(invalidHexString);
ByteBuf byteBuf = Unpooled.wrappedBuffer(bytes);
JsonNodeStorePayload jsonNodeStorePayload = new JsonNodeStorePayload(new ByteBufInputStream(byteBuf), bytes.length);
jsonNodeStorePayload.getPayload().toString();
} finally {
System.clearProperty("COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT");
System.clearProperty("COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER");
}
}

private static byte[] hexStringToByteArray(String hex) {
int len = hex.length();
byte[] data = new byte[len / 2];
for (int i = 0; i < len; i += 2) {
data[i / 2] = (byte) ((Character.digit(hex.charAt(i), 16) << 4)
+ Character.digit(hex.charAt(i+1), 16));
}

return data;
}
}
3 changes: 3 additions & 0 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#### Bugs Fixed

#### Other Changes
* Enable `JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS` by default for objectMapper. - See [PR 42520](https://github.com/Azure/azure-sdk-for-java/pull/42520)
* Added system property `COSMOS.ALLOW_UNQUOTED_CONTROL_CHARS` which allow customer to disable/enable `JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS`. - See [PR 42520](https://github.com/Azure/azure-sdk-for-java/pull/42520)
* Added system property `COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT` and `COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER` to allow user config error action on invalid UTF-8 bytes. - See [PR 42520](https://github.com/Azure/azure-sdk-for-java/pull/42520)

### 4.63.4 (2024-10-15)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,13 @@ public class Configs {
public static final String PREVENT_INVALID_ID_CHARS_VARIABLE = "COSMOS_PREVENT_INVALID_ID_CHARS";
public static final boolean DEFAULT_PREVENT_INVALID_ID_CHARS = false;

// Config of CodingErrorAction on charset decoder for malformed input
public static final String CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT = "COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT";
public static final String DEFAULT_CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT = StringUtils.EMPTY;

// Config of CodingErrorAction on charset decoder for unmapped character
public static final String CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER = "COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER";
public static final String DEFAULT_CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER = StringUtils.EMPTY;

// Metrics
// Samples:
Expand Down Expand Up @@ -790,4 +797,20 @@ public static int getConnectionEstablishmentTimeoutForPartitionRecoveryInSeconds

return DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS;
}

public static String getCharsetDecoderErrorActionOnMalformedInput() {
return System.getProperty(
CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT,
firstNonNull(
emptyToNull(System.getenv().get(CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT)),
DEFAULT_CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT));
}

public static String getCharsetDecoderErrorActionOnUnmappedCharacter() {
return System.getProperty(
CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER,
firstNonNull(
emptyToNull(System.getenv().get(CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER)),
DEFAULT_CHARSET_DECODER_ERROR_ACTION_ON_UNMAPPED_CHARACTER));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@
import java.util.function.Consumer;
import java.util.regex.Pattern;

import static com.azure.cosmos.implementation.guava25.base.MoreObjects.firstNonNull;
import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument;
import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull;
import static com.azure.cosmos.implementation.guava25.base.Strings.emptyToNull;

/**
* While this class is public, but it is not part of our published public APIs.
Expand All @@ -61,6 +63,11 @@
public class Utils {
private final static Logger logger = LoggerFactory.getLogger(Utils.class);

// Flag to indicate whether enable JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS
// Keep the config here not Configs to break the circular reference
private static final boolean DEFAULT_ALLOW_UNQUOTED_CONTROL_CHARS = true;
private static final String ALLOW_UNQUOTED_CONTROL_CHARS = "COSMOS.ALLOW_UNQUOTED_CONTROL_CHARS";

public static final Class<?> byteArrayClass = new byte[0].getClass();

private static final int JAVA_VERSION = getJavaVersion();
Expand Down Expand Up @@ -116,6 +123,10 @@ private static ObjectMapper createAndInitializeObjectMapper(boolean allowDuplica
}
objectMapper.configure(DeserializationFeature.ACCEPT_FLOAT_AS_INT, false);

if (shouldAllowUnquotedControlChars()) {
objectMapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true);
}

tryToLoadJacksonPerformanceLibrary(objectMapper);

objectMapper.registerModule(new JavaTimeModule());
Expand Down Expand Up @@ -762,4 +773,16 @@ public static long getMaxIntegratedCacheStalenessInMillis(DedicatedGatewayReques
}
return maxIntegratedCacheStaleness.toMillis();
}

public static boolean shouldAllowUnquotedControlChars() {

String shouldAllowUnquotedControlCharsConfig =
System.getProperty(
ALLOW_UNQUOTED_CONTROL_CHARS,
firstNonNull(
emptyToNull(System.getenv().get(ALLOW_UNQUOTED_CONTROL_CHARS)),
String.valueOf(DEFAULT_ALLOW_UNQUOTED_CONTROL_CHARS)));

return Boolean.parseBoolean(shouldAllowUnquotedControlCharsConfig);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,62 @@

package com.azure.cosmos.implementation.directconnectivity;

import com.azure.cosmos.implementation.Configs;
import com.azure.cosmos.implementation.Utils;
import com.fasterxml.jackson.databind.JsonNode;
import io.netty.buffer.ByteBufInputStream;
import io.netty.util.internal.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;

public class JsonNodeStorePayload implements StorePayload<JsonNode> {
private static final Logger logger = LoggerFactory.getLogger(JsonNodeStorePayload.class);
private static final CharsetDecoder fallbackCharsetDecoder = getFallbackCharsetDecoder();
private final int responsePayloadSize;
private final JsonNode jsonValue;

public JsonNodeStorePayload(ByteBufInputStream bufferStream, int readableBytes) {
if (readableBytes > 0) {
this.responsePayloadSize = readableBytes;
this.jsonValue = fromJson(bufferStream);
this.jsonValue = fromJson(bufferStream, readableBytes);
} else {
this.responsePayloadSize = 0;
this.jsonValue = null;
}
}

private static JsonNode fromJson(ByteBufInputStream bufferStream){
private static JsonNode fromJson(ByteBufInputStream bufferStream, int readableBytes) {
byte[] bytes = new byte[readableBytes];
try {
return Utils.getSimpleObjectMapper().readTree(bufferStream);
bufferStream.read(bytes);
return Utils.getSimpleObjectMapper().readTree(bytes);
} catch (IOException e) {
throw new IllegalStateException("Unable to parse JSON.", e);
if (fallbackCharsetDecoder != null) {
logger.warn("Unable to parse JSON, fallback to use customized charset decoder.", e);
return fromJsonWithFallbackCharsetDecoder(bytes);
} else {
throw new IllegalStateException("Unable to parse JSON.", e);
}
}
}

private static JsonNode fromJsonWithFallbackCharsetDecoder(byte[] bytes) {
try {
String sanitizedJson = fallbackCharsetDecoder.decode(ByteBuffer.wrap(bytes)).toString();
return Utils.getSimpleObjectMapper().readTree(sanitizedJson);
} catch (IOException e) {
throw new IllegalStateException(
String.format(
"Unable to parse JSON with fallback charset decoder[OnMalformedInput %s, OnUnmappedCharacter %s]",
Configs.getCharsetDecoderErrorActionOnMalformedInput(),
Configs.getCharsetDecoderErrorActionOnUnmappedCharacter()),
e);
}
}

Expand All @@ -40,4 +71,45 @@ public int getResponsePayloadSize() {
public JsonNode getPayload() {
return jsonValue;
}

private static CharsetDecoder getFallbackCharsetDecoder() {
if (StringUtil.isNullOrEmpty(Configs.getCharsetDecoderErrorActionOnMalformedInput())
&& StringUtil.isNullOrEmpty(Configs.getCharsetDecoderErrorActionOnMalformedInput())) {
logger.debug("No fallback charset decoder is enabled");
return null;
}

CharsetDecoder charsetDecoder = StandardCharsets.UTF_8.newDecoder();
// config coding error action for malformed input
switch (Configs.getCharsetDecoderErrorActionOnMalformedInput().toUpperCase()) {
case "REPLACE":
charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
break;
case "IGNORE":
charsetDecoder.onMalformedInput(CodingErrorAction.IGNORE);
break;
default:
logger.warn(
"Will use default error action for malformed input config {}",
Configs.getCharsetDecoderErrorActionOnMalformedInput());
break;
}

// config coding error action for unmapped character
switch (Configs.getCharsetDecoderErrorActionOnUnmappedCharacter().toUpperCase()) {
case "REPLACE":
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
break;
case "IGNORE":
charsetDecoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
break;
default:
logger.warn(
"Will use default error action for unmapped character config {}",
Configs.getCharsetDecoderErrorActionOnUnmappedCharacter());
break;
}

return charsetDecoder;
}
}

0 comments on commit 2ad2942

Please sign in to comment.