diff --git a/src/jmh/java/dev/blaauwendraad/masker/json/ValueMaskerBenchmark.java b/src/jmh/java/dev/blaauwendraad/masker/json/ValueMaskerBenchmark.java index ed7e2381..4a775329 100644 --- a/src/jmh/java/dev/blaauwendraad/masker/json/ValueMaskerBenchmark.java +++ b/src/jmh/java/dev/blaauwendraad/masker/json/ValueMaskerBenchmark.java @@ -38,7 +38,8 @@ public static class State { .maskKeys(Set.of("targetKey")) .build() ); - private final JsonMasker functionalMasker = JsonMasker.getMasker(JsonMaskingConfig.builder() + + private final JsonMasker rawValueMasker = JsonMasker.getMasker(JsonMaskingConfig.builder() .maskKeys(Set.of("targetKey")) .maskStringsWith(ValueMaskers.withRawValueFunction(value -> "\"***\"")) .maskNumbersWith(ValueMaskers.withRawValueFunction(value -> "\"###\"")) @@ -46,6 +47,14 @@ public static class State { .build() ); + private final JsonMasker textValueMasker = JsonMasker.getMasker(JsonMaskingConfig.builder() + .maskKeys(Set.of("targetKey")) + .maskStringsWith(ValueMaskers.withTextFunction(value -> "***")) + .maskNumbersWith(ValueMaskers.withTextFunction(value -> "###")) + .maskBooleansWith(ValueMaskers.withTextFunction(value -> "&&&")) + .build() + ); + private byte[] jsonBytes; @Setup @@ -61,7 +70,12 @@ public void maskWithStatic(State state) { } @Benchmark - public void maskWithFunctional(State state) { - state.functionalMasker.mask(state.jsonBytes); + public void maskWithRawValueFunction(State state) { + state.rawValueMasker.mask(state.jsonBytes); + } + + @Benchmark + public void maskWithTextValueFunction(State state) { + state.textValueMasker.mask(state.jsonBytes); } } diff --git a/src/main/java/dev/blaauwendraad/masker/json/InvalidJsonException.java b/src/main/java/dev/blaauwendraad/masker/json/InvalidJsonException.java index 431418e3..81817392 100644 --- a/src/main/java/dev/blaauwendraad/masker/json/InvalidJsonException.java +++ b/src/main/java/dev/blaauwendraad/masker/json/InvalidJsonException.java @@ -13,4 +13,8 @@ public class InvalidJsonException extends RuntimeException { public InvalidJsonException(String message, Throwable cause) { super(message, cause); } + + public InvalidJsonException(String message) { + super(message); + } } diff --git a/src/main/java/dev/blaauwendraad/masker/json/MaskingState.java b/src/main/java/dev/blaauwendraad/masker/json/MaskingState.java index be257376..4aaae10b 100644 --- a/src/main/java/dev/blaauwendraad/masker/json/MaskingState.java +++ b/src/main/java/dev/blaauwendraad/masker/json/MaskingState.java @@ -238,6 +238,12 @@ public String asString(int fromIndex, int length) { return new String(message, offset + fromIndex, length, StandardCharsets.UTF_8); } + @Override + public InvalidJsonException invalidJson(String message, int index) { + int offset = getCurrentValueStartIndex(); + return new InvalidJsonException("%s at index %s".formatted(message, offset + index)); + } + private void checkCurrentValueBounds(int index) { if (index < 0 || index >= byteLength()) { throw new IndexOutOfBoundsException("Index " + index + " is out of bounds for value of length " + byteLength()); diff --git a/src/main/java/dev/blaauwendraad/masker/json/ValueMaskerContext.java b/src/main/java/dev/blaauwendraad/masker/json/ValueMaskerContext.java index 7b0f6864..0f332349 100644 --- a/src/main/java/dev/blaauwendraad/masker/json/ValueMaskerContext.java +++ b/src/main/java/dev/blaauwendraad/masker/json/ValueMaskerContext.java @@ -58,4 +58,14 @@ public interface ValueMaskerContext { * Note: this INCLUDES the opening and closing quotes for string values */ String asString(int fromIndex, int length); + + /** + * Create an {@link InvalidJsonException} with the given message and index relative to the value (i.e. an index + * between {@code 0} and {@link ValueMaskerContext#byteLength()}). + * + * @param message error message + * @param index relative index where the JSON contains invalid sequence + * @return the exception to be thrown + */ + InvalidJsonException invalidJson(String message, int index); } diff --git a/src/main/java/dev/blaauwendraad/masker/json/ValueMaskers.java b/src/main/java/dev/blaauwendraad/masker/json/ValueMaskers.java index 078ac86e..2b1b8d1e 100644 --- a/src/main/java/dev/blaauwendraad/masker/json/ValueMaskers.java +++ b/src/main/java/dev/blaauwendraad/masker/json/ValueMaskers.java @@ -1,5 +1,8 @@ package dev.blaauwendraad.masker.json; +import dev.blaauwendraad.masker.json.config.KeyMaskingConfig; +import dev.blaauwendraad.masker.json.util.Utf8Util; + import java.nio.charset.StandardCharsets; import java.util.function.Function; @@ -148,8 +151,8 @@ public static ValueMasker.NumberMasker eachDigitWith(String value) { * Does not mask a target value (no-operation). Can be used if certain JSON value types do not * need to be masked, for example, not masking booleans or numbers. * - * @see dev.blaauwendraad.masker.json.config.KeyMaskingConfig.Builder#maskBooleansWith(ValueMasker.BooleanMasker) - * @see dev.blaauwendraad.masker.json.config.KeyMaskingConfig.Builder#maskNumbersWith(ValueMasker.NumberMasker) + * @see KeyMaskingConfig.Builder#maskBooleansWith(ValueMasker.BooleanMasker) + * @see KeyMaskingConfig.Builder#maskNumbersWith(ValueMasker.NumberMasker) */ public static ValueMasker.AnyValueMasker noop() { return describe("", context -> { @@ -198,10 +201,17 @@ public static ValueMasker.StringMasker email(int keepPrefixLength, int keepSuffi /** * Masks a target value with the provided {@link Function}. The target value (as raw JSON literal) is passed into * the function as a string regardless of the JSON type (string, numeric or a boolean). In case the target value is - * a JSON string the value the function will receive a JSON encoded value with the quotes as it appears in the JSON - * with line breaks encoded as \n, special characters like " or \ escaped with a backslash (\). + * a JSON string the value the function will receive a JSON encoded value as it appears in the JSON, including + * the opening and closing quotes, and the value containing escaped the control characters (e.g. {@code \n}, + * {@code \t}, etc.), quotation marks ({@code "}), escape character itself ({@code \}), and unicode-encoded + * characters ({@code \}{@code uXXXX}). + * + *

Consequently, the return value of the provided function must be a valid JSON encoded literal (of any + * JSON type), otherwise the masking will result in an invalid JSON. + * If the return value is {@code null}, the target value will be replaced with {@code null} JSON literal. * - *

Consequently, the return value of the provided function must be a valid JSON encoded literal (of any JSON type), otherwise the masking will result in an invalid JSON. + *

It is strongly advised to use an equivalent function {@link ValueMaskers#withTextFunction(Function)} + * which operates on a decoded string values and can never produce an invalid JSON. * *

The table below contains a couple examples for the masking * @@ -216,7 +226,15 @@ public static ValueMasker.StringMasker email(int keepPrefixLength, int keepSuffi * * + * + * *
{@code { "maskMe": "a ***" }} *
{@code { "maskMe": 12345 }} - * {@code value -> value.startsWith(123) ? "0" : value} + * {@code value -> value.startsWith("123") ? "0" : value} + * {@code { "maskMe": 0 }} + *
{@code { "maskMe": "12345" }} + * {@code value -> value.startsWith("123") ? "0" : value} + * {@code { "maskMe": "12345" }} + *
{@code { "maskMe": "12345" }} + * {@code value -> value.startsWith("\"123") ? "0" : value} * {@code { "maskMe": 0 }} *
{@code { "maskMe": "secret" }} @@ -246,8 +264,10 @@ public static ValueMasker.StringMasker email(int keepPrefixLength, int keepSuffi * *

Note: usually the {@link ValueMasker} operates on a byte level without parsing JSON values * into intermediate objects. This implementation, however, needs to allocate a {@link String} - * before passing it into the function and then turn it back into a byte array for the replacement, which introduces - * some performance overhead. + * before passing it into the function and then turn it back into a byte array for the replacement, + * which introduces some performance overhead. + * + * @see ValueMaskers#withTextFunction(Function) */ public static ValueMasker.AnyValueMasker withRawValueFunction(Function masker) { return describe( @@ -258,7 +278,188 @@ public static ValueMasker.AnyValueMasker withRawValueFunction(FunctionA non-null return value of the provided function will be encoded into a JSON string regardless of the + * JSON type of the original value. Any character that MUST be escaped (as per RFC 8259, section 7) will be escaped. + * Characters that MAY be escaped (as per RFC 8259) WILL NOT be escaped. + * If the return value is {@code null}, the target value will be replaced with {@code null} JSON literal. + * + *

The table below contains a couple examples for the masking + * + * + * + * + * + * + * + * + * + * + * + * + *
Examples of using withTextFunction
Input JSONFunctionMasked JSON
{@code { "maskMe": "a secret" }} + * {@code value -> value.replaceAll("secret", "***")} + * {@code { "maskMe": "a ***" }} + *
{@code { "maskMe": 12345 }} + * {@code value -> value.startsWith("123") ? "0" : value} + * {@code { "maskMe": "0" }} + *
{@code { "maskMe": 12345 }} + * {@code value -> value} + * {@code { "maskMe": "12345" }} + *
{@code { "maskMe": "secret" }} + * {@code value -> "***"} + * {@code { "maskMe": "***" }} + *
{@code { "maskMe": "secret value" }} + * {@code value -> value.substring(0, 3) + "***"} + * {@code { "maskMe": "sec***" }} + *
{@code { "maskMe": "Andrii \"Juice\" Pilshchykov" }} + * {@code value -> value.replaceAll("\"", "(quote)")} + * {@code { "maskMe": "Andrii (quote)Juice(quote) Pilshchykov" }} + *
+ * + *

Note: in all other cases, the {@link ValueMasker} operates on a byte level without parsing JSON values into + * intermediate objects. This implementation, however, needs to allocate a {@link String} before passing it to + * the {@link Function} and then turn it back into a byte array for the replacement, which introduces some + * performance overhead. + */ + public static ValueMasker.AnyValueMasker withTextFunction(Function masker) { + return describe( + "withTextFunction (%s)".formatted(masker), + context -> { + String decodedValue; // the original value decoded + if (context.getByte(0) != '"') { + // deals with JSON numbers, booleans, and null + decodedValue = context.asString(0, context.byteLength()); + } else { + // deals with JSON strings + int encodedIndex = 1; // skip opening quote of the JSON string + int valueEndIndex = context.byteLength() - 1; // minus the closing quote + int decodedIndex = 0; // + // the length of decodedBytes is guaranteed to be lower or equal to the length + // of the encoded bytes sequence: + // 1. for every encoded character (2 bytes), the output is the character without escape - 1 byte + // 2. for every unicode encoded character (6 bytes), the output character is within 1-3 bytes + // 3. for the pair of unicode encoded surrogates (12 bytes), the output is always 4 bytes + // which means that if any escape is present, the decodedBytes will have some null characters + // at the tail, which are cut from the resulting string + byte[] decodedBytes = new byte[context.byteLength()]; + while (encodedIndex < valueEndIndex) { + byte originalByte = context.getByte(encodedIndex++); + // next character is escaped, removing the backslash + if (originalByte != '\\') { + decodedBytes[decodedIndex++] = originalByte; // unescaped character are already decoded + } else { + originalByte = context.getByte(encodedIndex++); + switch (originalByte) { + case 'b' -> decodedBytes[decodedIndex++] = '\b'; + case 't' -> decodedBytes[decodedIndex++] = '\t'; + case 'n' -> decodedBytes[decodedIndex++] = '\n'; + case 'f' -> decodedBytes[decodedIndex++] = '\f'; + case 'r' -> decodedBytes[decodedIndex++] = '\r'; + case '"', '/', '\\' -> decodedBytes[decodedIndex++] = originalByte; + case 'u' -> { + // Decode hexadecimal encoded unicode character into + int valueStartIndex = encodedIndex - 2; + try { + char unicodeHexBytesAsChar = Utf8Util.unicodeHexToChar( + context.getByte(encodedIndex++), + context.getByte(encodedIndex++), + context.getByte(encodedIndex++), + context.getByte(encodedIndex++) + ); + if (unicodeHexBytesAsChar < 0x80) { + // < 128 (in decimal) fits in 7 bits which is 1 byte of data in UTF-8 + decodedBytes[decodedIndex++] = (byte) unicodeHexBytesAsChar; + } else if (unicodeHexBytesAsChar < 0x800) { // 2048 in decimal, + // < 2048 (in decimal) fits in 11 bits which is 2 bytes of data in UTF-8 + decodedBytes[decodedIndex++] = (byte) (0xc0 | (unicodeHexBytesAsChar >> 6)); + decodedBytes[decodedIndex++] = (byte) (0x80 | (unicodeHexBytesAsChar & 0x3f)); + } else if (Character.isSurrogate(unicodeHexBytesAsChar)) { + // decoding non-BMP characters in UTF-16 using a pair of high and low + // surrogates which together form one unicode character. + int codePoint = -1; + if (Character.isHighSurrogate(unicodeHexBytesAsChar) // first surrogate must be the high surrogate + && encodedIndex < context.byteLength() - 6 /* -6 for all bytes of + the byte encoded unicode character (\\u + 4 hex bytes) to prevent possible ArrayIndexOutOfBoundsExceptions */ + && context.getByte(encodedIndex) == '\\' // the high surrogate must be followed by a low surrogate (starting with \\u) + && context.getByte(encodedIndex + 1) == 'u' + ) { + encodedIndex += 2; // step over the '\' and 'u' + char lowSurrogate = Utf8Util.unicodeHexToChar( + context.getByte(encodedIndex++), + context.getByte(encodedIndex++), + context.getByte(encodedIndex++), + context.getByte(encodedIndex++) + ); + if (Character.isLowSurrogate(lowSurrogate)) { + codePoint = Character.toCodePoint(unicodeHexBytesAsChar, lowSurrogate); + } + } + if (codePoint < 0) { + // default String behaviour is to replace invalid surrogate pairs + // with the character '?', but from the JSON perspective, + // it's better to throw an InvalidJsonException + throw context.invalidJson("Invalid surrogate pair '%s'" + .formatted(context.asString(valueStartIndex, encodedIndex - valueStartIndex)), valueStartIndex); + } else { + decodedBytes[decodedIndex++] = (byte) (0xf0 | (codePoint >> 18)); + decodedBytes[decodedIndex++] = (byte) (0x80 | ((codePoint >> 12) & 0x3f)); + decodedBytes[decodedIndex++] = (byte) (0x80 | ((codePoint >> 6) & 0x3f)); + decodedBytes[decodedIndex++] = (byte) (0x80 | (codePoint & 0x3f)); + } + } else { + // dealing with characters with values between 2048 and 65536 which + // equals to 2^16 or 16 bits, which is 3 bytes of data in UTF-8 encoding + decodedBytes[decodedIndex++] = (byte) (0xe0 | (unicodeHexBytesAsChar >> 12)); + decodedBytes[decodedIndex++] = (byte) (0x80 | ((unicodeHexBytesAsChar >> 6) & 0x3f)); + decodedBytes[decodedIndex++] = (byte) (0x80 | (unicodeHexBytesAsChar & 0x3f)); + } + } catch (IllegalArgumentException e) { + throw context.invalidJson(e.getMessage(), valueStartIndex); + } + } + default -> throw context.invalidJson("Unexpected character after '\\': '%s'".formatted((char) originalByte), encodedIndex); + } + } + } + decodedValue = new String(decodedBytes, 0, decodedIndex, StandardCharsets.UTF_8); + } + String maskedValue = masker.apply(decodedValue); + if (maskedValue == null) { + maskedValue = "null"; + } else { + StringBuilder encoded = new StringBuilder(); + encoded.append("\""); // opening quote of the encoded string + for (int i = 0; i < maskedValue.length(); i++) { + char character = maskedValue.charAt(i); + // escape all characters that need to be escaped, unicode character do not have to be + // transformed into \ u form + switch (character) { + case '\b' -> encoded.append("\\b"); + case '\t' -> encoded.append("\\t"); + case '\n' -> encoded.append("\\n"); + case '\f' -> encoded.append("\\f"); + case '\r' -> encoded.append("\\r"); + case '"', '\\' -> encoded.append("\\").append(character); + default -> encoded.append(character); + } + } + encoded.append("\""); // closing quote of the encoded string + maskedValue = encoded.toString(); + } + byte[] replacementBytes = maskedValue.getBytes(StandardCharsets.UTF_8); + context.replaceBytes(0, context.byteLength(), replacementBytes, 1); }); } } diff --git a/src/main/java/dev/blaauwendraad/masker/json/util/Utf8Util.java b/src/main/java/dev/blaauwendraad/masker/json/util/Utf8Util.java index 840aab6e..95250255 100644 --- a/src/main/java/dev/blaauwendraad/masker/json/util/Utf8Util.java +++ b/src/main/java/dev/blaauwendraad/masker/json/util/Utf8Util.java @@ -31,6 +31,39 @@ public static int getCodePointByteLength(byte input) { throw new IllegalArgumentException("Input byte is not using UTF-8 encoding"); } + /** + * Converts a 4-byte UTF-8 encoded character ('\u0000') into a char. + * Each byte MUST represent a valid HEX character, i.e. + *

    + *
  • in range from {@code 48} ({@code '0'}) to {@code 57} ({@code '9'}) + *
  • in range from {@code 65} ({@code 'A'}) to {@code 70} ({@code 'F'}) + *
  • in range from {@code 97} ({@code 'a'}) to {@code 102} ({@code 'f'}) + *
+ */ + public static char unicodeHexToChar(byte b1, byte b2, byte b3, byte b4) { + int value = Character.digit(validateHex(b1), 16); + // since each byte transformed into a value, that is guaranteed to be in range 0 - 16 (4 bits) + // we shift by that amount + value = (value << 4) | Character.digit(validateHex(b2), 16); + value = (value << 4) | Character.digit(validateHex(b3), 16); + value = (value << 4) | Character.digit(validateHex(b4), 16); + return (char) value; + } + + private static byte validateHex(byte hexByte) { + if (hexByte >= 48 && hexByte <= 57) { + return hexByte; // a digit from 0 to 9 + } + if (hexByte >= 65 && hexByte <= 70) { + return hexByte; // a character from A to F + } + if (hexByte >= 97 && hexByte <= 102) { + return hexByte; // a character from a to f + } + throw new IllegalArgumentException("Invalid hex character '%s'".formatted((char) hexByte)); + + } + /** * Counts the number of non-visible characters inside the string. The intervals provided must be * within a single string as this method will not do boundary checks or terminate at the end of diff --git a/src/test/java/dev/blaauwendraad/masker/json/KeyMatcherTest.java b/src/test/java/dev/blaauwendraad/masker/json/KeyMatcherTest.java index efaa8c24..7b90daec 100644 --- a/src/test/java/dev/blaauwendraad/masker/json/KeyMatcherTest.java +++ b/src/test/java/dev/blaauwendraad/masker/json/KeyMatcherTest.java @@ -171,7 +171,7 @@ void shouldMatchJsonPathArrays() { @Test void shouldNotMatchPrefix() { - KeyMatcher keyMatcher = new KeyMatcher(JsonMaskingConfig.builder().maskKeys(Set.of("maskMe")).build()); + KeyMatcher keyMatcher = new KeyMatcher(JsonMaskingConfig.builder().maskKeys(Set.of("maskMe", "test")).build()); assertThatConfig(keyMatcher, "mask").isNull(); assertThatConfig(keyMatcher, "maskMe").isNotNull(); } diff --git a/src/test/java/dev/blaauwendraad/masker/json/MaskingStateTest.java b/src/test/java/dev/blaauwendraad/masker/json/MaskingStateTest.java index 8957891d..0c762066 100644 --- a/src/test/java/dev/blaauwendraad/masker/json/MaskingStateTest.java +++ b/src/test/java/dev/blaauwendraad/masker/json/MaskingStateTest.java @@ -1,9 +1,11 @@ package dev.blaauwendraad.masker.json; +import dev.blaauwendraad.masker.json.config.JsonMaskingConfig; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; import java.nio.charset.StandardCharsets; +import java.util.Set; class MaskingStateTest { @Test @@ -56,4 +58,37 @@ void getCurrentJsonPathNodeFromEmptyJsonPath() { MaskingState maskingState = new MaskingState("[]".getBytes(StandardCharsets.UTF_8), true); Assertions.assertThat(maskingState.getCurrentJsonPathNode()).isNull(); } + + @Test + void shouldThrowErrorWhenGettingStartValueIndexOutsideOfMasking() { + MaskingState maskingState = new MaskingState(""" + { + "maskMe": "some value" + } + """.getBytes(StandardCharsets.UTF_8), false); + + Assertions.assertThatThrownBy(() -> maskingState.getCurrentValueStartIndex()) + .isInstanceOf(IllegalStateException.class); + } + + @Test + void shouldUseCorrectOffsetWhenThrowingValueMaskerError() { + var jsonMasker = JsonMasker.getMasker(JsonMaskingConfig.builder() + .maskKeys(Set.of("maskMe")) + .maskStringsWith(context -> { + throw context.invalidJson("Didn't like the value at index 3", 3); + }) + .build() + ); + + Assertions.assertThatThrownBy(() -> + jsonMasker.mask(""" + { + "maskMe": "some value" + } + """ + )) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Didn't like the value at index 3 at index 19"); + } } \ No newline at end of file diff --git a/src/test/java/dev/blaauwendraad/masker/json/ValueMaskersTest.java b/src/test/java/dev/blaauwendraad/masker/json/ValueMaskersTest.java index ff828076..ae07b626 100644 --- a/src/test/java/dev/blaauwendraad/masker/json/ValueMaskersTest.java +++ b/src/test/java/dev/blaauwendraad/masker/json/ValueMaskersTest.java @@ -4,9 +4,13 @@ import dev.blaauwendraad.masker.json.util.ByteValueMaskerContext; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import javax.annotation.Nonnull; import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.stream.Stream; class ValueMaskersTest { @Test @@ -82,6 +86,11 @@ void eachDigitWithInteger() { Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(12345, valueMasker)) .isEqualTo("11111"); + + Assertions.assertThatThrownBy(() -> ValueMaskers.eachDigitWith(0)) + .isInstanceOf(IllegalArgumentException.class); + Assertions.assertThatThrownBy(() -> ValueMaskers.eachDigitWith(10)) + .isInstanceOf(IllegalArgumentException.class); } @Test @@ -160,6 +169,12 @@ void withRawValueFunction() { if (value.startsWith("\"secret:")) { return "\"***\""; } + if (value.startsWith("23")) { + return "\"###\""; + } + if (value.equals("false")) { + return "\"&&&\""; + } return value; }); @@ -169,8 +184,169 @@ void withRawValueFunction() { .isEqualTo("\"***\""); Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(12345, valueMasker)) .isEqualTo("12345"); + Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(23456, valueMasker)) + .isEqualTo("\"###\""); Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(true, valueMasker)) .isEqualTo("true"); + Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(false, valueMasker)) + .isEqualTo("\"&&&\""); + Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(true, ValueMaskers.withRawValueFunction(value -> null))) + .isEqualTo("null"); + } + + @Test + void withTextFunction() { + var valueMasker = ValueMaskers.withTextFunction(value -> { + if (value.startsWith("secret:")) { + return "***"; + } + if (value.startsWith("23")) { + return "###"; + } + if (value.equals("false")) { + return "&&&"; + } + return value; + }); + + Assertions.assertThat(ByteValueMaskerContext.maskStringWith("not a secret", valueMasker)) + .isEqualTo("\"not a secret\""); + Assertions.assertThat(ByteValueMaskerContext.maskStringWith("secret: very much", valueMasker)) + .isEqualTo("\"***\""); + Assertions.assertThat(ByteValueMaskerContext.maskStringWith("", valueMasker)) + .isEqualTo("\"\""); + Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(12345, valueMasker)) + .isEqualTo("\"12345\""); + Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(23456, valueMasker)) + .isEqualTo("\"###\""); + Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(true, valueMasker)) + .isEqualTo("\"true\""); + Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(false, valueMasker)) + .isEqualTo("\"&&&\""); + Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(true, ValueMaskers.withTextFunction(value -> null))) + .isEqualTo("null"); + } + + @Test + void withTextFunctionEscapedCharacters() { + String jsonEncoded = "\\b\\t\\n\\f\\r\\\"\\\\"; + Assertions.assertThat(ByteValueMaskerContext.maskStringWith(jsonEncoded, ValueMaskers.withTextFunction(value -> { + Assertions.assertThat(value).isEqualTo("\b\t\n\f\r\"\\"); + return value; + }))).isEqualTo("\"" + jsonEncoded + "\""); // needs to be escaped exactly like input + + String forwardSlash = "\\/"; + Assertions.assertThat(ByteValueMaskerContext.maskStringWith(forwardSlash, ValueMaskers.withTextFunction(value -> { + Assertions.assertThat(value).isEqualTo("/"); + return value; + }))).isEqualTo("\"/\""); // does not need to be escaped + } + + private static Stream> unicodeCharacters() { + // equivalent pairs of unicode characters: actual character (expected), JSON-escaped, Java-escaped + return Stream.of( + List.of("a", "\\u0061", "\u0061"), // 1-byte character + List.of("ƒ", "\\u0192", "\u0192"), // 2-byte character + List.of("€", "\\u20AC", "\u20AC"), // 3-byte character + List.of("䀀", "\\u4000", "\u4000"), // 3-byte character + List.of("𐍈", "\\uD800\\uDF48", "\uD800\uDF48"), // 4-byte character + List.of("𠜎", "\\uD841\\uDF0E", "\uD841\uDF0E"), // 4-byte character + List.of("💩", "\\uD83D\\uDCA9", "\uD83D\uDCA9") // 4-byte character + ); + } + + @ParameterizedTest + @MethodSource("unicodeCharacters") + void withTextFunctionUnicodeEncoded(List characters) { + String expected = characters.get(0); + // equivalent pairs of unicode characters: JSON-escaped, Java-escaped, and actual character + for (String unicodeCharacter : characters) { + // single value + Assertions.assertThat(ByteValueMaskerContext.maskStringWith(unicodeCharacter, ValueMaskers.withTextFunction(value -> { + Assertions.assertThat(value).isEqualTo(expected); + return value; + }))).isEqualTo("\"" + expected + "\""); + + // lowercase hex value, isn't really allowed by JSON specification, but Java supports that in Character.digit + // i.e. \\u20AC and \\u20ac both decoded to the same value € + Assertions.assertThat(ByteValueMaskerContext.maskStringWith(unicodeCharacter.toLowerCase(), ValueMaskers.withTextFunction(value -> { + Assertions.assertThat(value).isEqualTo(expected); + return value; + }))).isEqualTo("\"" + expected + "\""); + + // double value + Assertions.assertThat(ByteValueMaskerContext.maskStringWith(unicodeCharacter + unicodeCharacter, ValueMaskers.withTextFunction(value -> { + Assertions.assertThat(value).isEqualTo(expected + expected); + return value; + }))).isEqualTo("\"" + expected + expected + "\""); + + // with prefix + Assertions.assertThat(ByteValueMaskerContext.maskStringWith("prefix" + unicodeCharacter, ValueMaskers.withTextFunction(value -> { + Assertions.assertThat(value).isEqualTo("prefix" + expected); + return value; + }))).isEqualTo("\"prefix" + expected + "\""); + + // with suffix + Assertions.assertThat(ByteValueMaskerContext.maskStringWith(unicodeCharacter + "suffix", ValueMaskers.withTextFunction(value -> { + Assertions.assertThat(value).isEqualTo(expected + "suffix"); + return value; + }))).isEqualTo("\"" + expected + "suffix\""); + + // with prefix and suffix + Assertions.assertThat(ByteValueMaskerContext.maskStringWith("prefix" + unicodeCharacter + "suffix", ValueMaskers.withTextFunction(value -> { + Assertions.assertThat(value).isEqualTo("prefix" + expected + "suffix"); + return value; + }))).isEqualTo("\"prefix" + expected + "suffix\""); + } + } + + @Test + void withTextFunctionInvalidEscape() { + ValueMasker.AnyValueMasker valueMasker = ValueMaskers.withTextFunction(value -> value); + + Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\z", valueMasker)) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Unexpected character after '\\': 'z' at index 3"); + } + + @Test + void withTextFunctionInvalidUnicode() { + ValueMasker.AnyValueMasker valueMasker = ValueMaskers.withTextFunction(value -> value); + + // high surrogate without low surrogate + Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uD83D", valueMasker)) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Invalid surrogate pair '\\uD83D' at index 1"); + + // high surrogate followed by another high surrogate + Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uD83D\\uD83D", valueMasker)) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Invalid surrogate pair '\\uD83D\\uD83D' at index 1"); + + // high surrogate without low surrogate but other suffix + Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uD83Dsuffix", valueMasker)) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Invalid surrogate pair '\\uD83D' at index 1"); + + // high surrogate without low surrogate but an escape character + Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uD83D\\n0000", valueMasker)) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Invalid surrogate pair '\\uD83D' at index 1"); + + // low surrogate without high surrogate + Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uDCA9", valueMasker)) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Invalid surrogate pair '\\uDCA9' at index 1"); + + // low surrogate without high surrogate but other prefix + Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("prefix\\uDCA9", valueMasker)) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Invalid surrogate pair '\\uDCA9' at index 7"); + + // unicode character uses lowercase hex value + Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uXXXX", valueMasker)) + .isInstanceOf(InvalidJsonException.class) + .hasMessage("Invalid hex character 'X' at index 1"); } @Test diff --git a/src/test/java/dev/blaauwendraad/masker/json/path/JsonPathTest.java b/src/test/java/dev/blaauwendraad/masker/json/path/JsonPathTest.java index edec9530..6dd9ebc9 100644 --- a/src/test/java/dev/blaauwendraad/masker/json/path/JsonPathTest.java +++ b/src/test/java/dev/blaauwendraad/masker/json/path/JsonPathTest.java @@ -17,4 +17,15 @@ void getLastSegmentForEmptyJsonPath() { Assertions.assertNull(jsonPath.getQueryArgument()); } + @Test + void shouldCheckSegmentsOnEquals() { + JsonPath a = new JsonPath(new String[]{"a", "b"}); + JsonPath b = new JsonPath(new String[]{"a", "b"}); + + Assertions.assertEquals(a, a); + Assertions.assertEquals(a, b); + Assertions.assertEquals(a.hashCode(), b.hashCode()); + Assertions.assertNotEquals(a, null); + } + } \ No newline at end of file diff --git a/src/test/java/dev/blaauwendraad/masker/json/util/ByteValueMaskerContext.java b/src/test/java/dev/blaauwendraad/masker/json/util/ByteValueMaskerContext.java index 82ae6b77..c9152d7f 100644 --- a/src/test/java/dev/blaauwendraad/masker/json/util/ByteValueMaskerContext.java +++ b/src/test/java/dev/blaauwendraad/masker/json/util/ByteValueMaskerContext.java @@ -1,5 +1,6 @@ package dev.blaauwendraad.masker.json.util; +import dev.blaauwendraad.masker.json.InvalidJsonException; import dev.blaauwendraad.masker.json.ValueMasker; import dev.blaauwendraad.masker.json.ValueMaskerContext; @@ -115,6 +116,11 @@ public String asString(int fromIndex, int length) { return new String(value, fromIndex, length, StandardCharsets.UTF_8); } + @Override + public InvalidJsonException invalidJson(String message, int index) { + return new InvalidJsonException("%s at index %s".formatted(message, index)); + } + public String getMaskedValue() { return new String(maskedValue, StandardCharsets.UTF_8); } diff --git a/src/test/java/dev/blaauwendraad/masker/json/util/Utf8UtilTest.java b/src/test/java/dev/blaauwendraad/masker/json/util/Utf8UtilTest.java index c38ec2c4..a05df327 100644 --- a/src/test/java/dev/blaauwendraad/masker/json/util/Utf8UtilTest.java +++ b/src/test/java/dev/blaauwendraad/masker/json/util/Utf8UtilTest.java @@ -31,6 +31,35 @@ void nonUtf8Byte() { .isThrownBy(() -> Utf8Util.getCodePointByteLength((byte) (b << 1))); } + @Test + void unicodeHexToChar() { + Assertions.assertThat(Utf8Util.unicodeHexToChar((byte) '0', (byte) '0', (byte) '2', (byte) '0')) + .isEqualTo(' '); + Assertions.assertThat(Utf8Util.unicodeHexToChar((byte) '0', (byte) '0', (byte) '3', (byte) '0')) + .isEqualTo('0'); + Assertions.assertThat(Utf8Util.unicodeHexToChar((byte) '0', (byte) '0', (byte) '4', (byte) '0')) + .isEqualTo('@'); + } + + @Test + void unicodeHexToCharInvalid() { + Assertions.assertThatThrownBy(() -> Utf8Util.unicodeHexToChar((byte) 35, (byte) '0', (byte) '2', (byte) '0')) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid hex character '#'"); + + Assertions.assertThatThrownBy(() -> Utf8Util.unicodeHexToChar((byte) 61, (byte) '0', (byte) '2', (byte) '0')) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid hex character '='"); + + Assertions.assertThatThrownBy(() -> Utf8Util.unicodeHexToChar((byte) 71, (byte) '0', (byte) '2', (byte) '0')) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid hex character 'G'"); + + Assertions.assertThatThrownBy(() -> Utf8Util.unicodeHexToChar((byte) 103, (byte) '0', (byte) '2', (byte) '0')) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid hex character 'g'"); + } + @ParameterizedTest @MethodSource("unicodeCharactersLength") void unicodeCharacters(String character, int utf8ByteLength) {