Skip to content

Commit

Permalink
Add actual withTextFunction that decodes / encodes JSON values (#104)
Browse files Browse the repository at this point in the history
* Add actual withTextFunction that decodes / encodes JSON values

* Add benchmark for encoding function

* Refactor handling of all unicode characters, added tests, removed escaping of forward slash

* Throw exception instead of replacing with ?

* Added comment

* Added javadoc for invalidJson

* Added docs to withTextFunction, improved docs on `withRawValueFunction`

* Added a test case with empty input string

* Added a test case with two consecutive high surrogates

* Added a test case with high surrogate followed by an escape character

* Fix docs for replaceAll case

* Apply suggestions from code review

* Apply suggestions from code review

* Extract ValueMasker in test

* Not enough test coverage

* Extract ValueMasker in test

* Remove unnecessary space

* Fixed test case with escape character, 100% coverage, baby

* Small refactor for decoded index

* Update src/main/java/dev/blaauwendraad/masker/json/InvalidJsonException.java

* Added validation for hex byte conversion and docs

* Added tests for specific hex conversion to cover all branches

* More coverage

* Added a comment regarding decodedBytes

* Added some clarifying comments and inverted some conditions

---------

Co-authored-by: breus <[email protected]>
  • Loading branch information
gavlyukovskiy and Breus authored Apr 8, 2024
1 parent 12324ff commit c0e63df
Show file tree
Hide file tree
Showing 12 changed files with 538 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,23 @@ public static class State {
.maskKeys(Set.of("targetKey"))
.build()
);
private final JsonMasker functionalMasker = JsonMasker.getMasker(JsonMaskingConfig.builder()

private final JsonMasker rawValueMasker = JsonMasker.getMasker(JsonMaskingConfig.builder()
.maskKeys(Set.of("targetKey"))
.maskStringsWith(ValueMaskers.withRawValueFunction(value -> "\"***\""))
.maskNumbersWith(ValueMaskers.withRawValueFunction(value -> "\"###\""))
.maskBooleansWith(ValueMaskers.withRawValueFunction(value -> "\"&&&\""))
.build()
);

private final JsonMasker textValueMasker = JsonMasker.getMasker(JsonMaskingConfig.builder()
.maskKeys(Set.of("targetKey"))
.maskStringsWith(ValueMaskers.withTextFunction(value -> "***"))
.maskNumbersWith(ValueMaskers.withTextFunction(value -> "###"))
.maskBooleansWith(ValueMaskers.withTextFunction(value -> "&&&"))
.build()
);

private byte[] jsonBytes;

@Setup
Expand All @@ -61,7 +70,12 @@ public void maskWithStatic(State state) {
}

@Benchmark
public void maskWithFunctional(State state) {
state.functionalMasker.mask(state.jsonBytes);
public void maskWithRawValueFunction(State state) {
state.rawValueMasker.mask(state.jsonBytes);
}

@Benchmark
public void maskWithTextValueFunction(State state) {
state.textValueMasker.mask(state.jsonBytes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,8 @@ public class InvalidJsonException extends RuntimeException {
public InvalidJsonException(String message, Throwable cause) {
super(message, cause);
}

public InvalidJsonException(String message) {
super(message);
}
}
6 changes: 6 additions & 0 deletions src/main/java/dev/blaauwendraad/masker/json/MaskingState.java
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,12 @@ public String asString(int fromIndex, int length) {
return new String(message, offset + fromIndex, length, StandardCharsets.UTF_8);
}

@Override
public InvalidJsonException invalidJson(String message, int index) {
int offset = getCurrentValueStartIndex();
return new InvalidJsonException("%s at index %s".formatted(message, offset + index));
}

private void checkCurrentValueBounds(int index) {
if (index < 0 || index >= byteLength()) {
throw new IndexOutOfBoundsException("Index " + index + " is out of bounds for value of length " + byteLength());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,14 @@ public interface ValueMaskerContext {
* Note: this INCLUDES the opening and closing quotes for string values
*/
String asString(int fromIndex, int length);

/**
* Create an {@link InvalidJsonException} with the given message and index relative to the value (i.e. an index
* between {@code 0} and {@link ValueMaskerContext#byteLength()}).
*
* @param message error message
* @param index relative index where the JSON contains invalid sequence
* @return the exception to be thrown
*/
InvalidJsonException invalidJson(String message, int index);
}
219 changes: 210 additions & 9 deletions src/main/java/dev/blaauwendraad/masker/json/ValueMaskers.java

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions src/main/java/dev/blaauwendraad/masker/json/util/Utf8Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,39 @@ public static int getCodePointByteLength(byte input) {
throw new IllegalArgumentException("Input byte is not using UTF-8 encoding");
}

/**
* Converts a 4-byte UTF-8 encoded character ('\u0000') into a char.
* Each byte MUST represent a valid HEX character, i.e.
* <ul>
* <li>in range from {@code 48} ({@code '0'}) to {@code 57} ({@code '9'})
* <li>in range from {@code 65} ({@code 'A'}) to {@code 70} ({@code 'F'})
* <li>in range from {@code 97} ({@code 'a'}) to {@code 102} ({@code 'f'})
* </ul>
*/
public static char unicodeHexToChar(byte b1, byte b2, byte b3, byte b4) {
int value = Character.digit(validateHex(b1), 16);
// since each byte transformed into a value, that is guaranteed to be in range 0 - 16 (4 bits)
// we shift by that amount
value = (value << 4) | Character.digit(validateHex(b2), 16);
value = (value << 4) | Character.digit(validateHex(b3), 16);
value = (value << 4) | Character.digit(validateHex(b4), 16);
return (char) value;
}

private static byte validateHex(byte hexByte) {
if (hexByte >= 48 && hexByte <= 57) {
return hexByte; // a digit from 0 to 9
}
if (hexByte >= 65 && hexByte <= 70) {
return hexByte; // a character from A to F
}
if (hexByte >= 97 && hexByte <= 102) {
return hexByte; // a character from a to f
}
throw new IllegalArgumentException("Invalid hex character '%s'".formatted((char) hexByte));

}

/**
* Counts the number of non-visible characters inside the string. The intervals provided must be
* within a single string as this method will not do boundary checks or terminate at the end of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ void shouldMatchJsonPathArrays() {

@Test
void shouldNotMatchPrefix() {
KeyMatcher keyMatcher = new KeyMatcher(JsonMaskingConfig.builder().maskKeys(Set.of("maskMe")).build());
KeyMatcher keyMatcher = new KeyMatcher(JsonMaskingConfig.builder().maskKeys(Set.of("maskMe", "test")).build());
assertThatConfig(keyMatcher, "mask").isNull();
assertThatConfig(keyMatcher, "maskMe").isNotNull();
}
Expand Down
35 changes: 35 additions & 0 deletions src/test/java/dev/blaauwendraad/masker/json/MaskingStateTest.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package dev.blaauwendraad.masker.json;

import dev.blaauwendraad.masker.json.config.JsonMaskingConfig;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;

import java.nio.charset.StandardCharsets;
import java.util.Set;

class MaskingStateTest {
@Test
Expand Down Expand Up @@ -56,4 +58,37 @@ void getCurrentJsonPathNodeFromEmptyJsonPath() {
MaskingState maskingState = new MaskingState("[]".getBytes(StandardCharsets.UTF_8), true);
Assertions.assertThat(maskingState.getCurrentJsonPathNode()).isNull();
}

@Test
void shouldThrowErrorWhenGettingStartValueIndexOutsideOfMasking() {
MaskingState maskingState = new MaskingState("""
{
"maskMe": "some value"
}
""".getBytes(StandardCharsets.UTF_8), false);

Assertions.assertThatThrownBy(() -> maskingState.getCurrentValueStartIndex())
.isInstanceOf(IllegalStateException.class);
}

@Test
void shouldUseCorrectOffsetWhenThrowingValueMaskerError() {
var jsonMasker = JsonMasker.getMasker(JsonMaskingConfig.builder()
.maskKeys(Set.of("maskMe"))
.maskStringsWith(context -> {
throw context.invalidJson("Didn't like the value at index 3", 3);
})
.build()
);

Assertions.assertThatThrownBy(() ->
jsonMasker.mask("""
{
"maskMe": "some value"
}
"""
))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Didn't like the value at index 3 at index 19");
}
}
176 changes: 176 additions & 0 deletions src/test/java/dev/blaauwendraad/masker/json/ValueMaskersTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
import dev.blaauwendraad.masker.json.util.ByteValueMaskerContext;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import javax.annotation.Nonnull;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.stream.Stream;

class ValueMaskersTest {
@Test
Expand Down Expand Up @@ -82,6 +86,11 @@ void eachDigitWithInteger() {

Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(12345, valueMasker))
.isEqualTo("11111");

Assertions.assertThatThrownBy(() -> ValueMaskers.eachDigitWith(0))
.isInstanceOf(IllegalArgumentException.class);
Assertions.assertThatThrownBy(() -> ValueMaskers.eachDigitWith(10))
.isInstanceOf(IllegalArgumentException.class);
}

@Test
Expand Down Expand Up @@ -160,6 +169,12 @@ void withRawValueFunction() {
if (value.startsWith("\"secret:")) {
return "\"***\"";
}
if (value.startsWith("23")) {
return "\"###\"";
}
if (value.equals("false")) {
return "\"&&&\"";
}
return value;
});

Expand All @@ -169,8 +184,169 @@ void withRawValueFunction() {
.isEqualTo("\"***\"");
Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(12345, valueMasker))
.isEqualTo("12345");
Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(23456, valueMasker))
.isEqualTo("\"###\"");
Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(true, valueMasker))
.isEqualTo("true");
Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(false, valueMasker))
.isEqualTo("\"&&&\"");
Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(true, ValueMaskers.withRawValueFunction(value -> null)))
.isEqualTo("null");
}

@Test
void withTextFunction() {
var valueMasker = ValueMaskers.withTextFunction(value -> {
if (value.startsWith("secret:")) {
return "***";
}
if (value.startsWith("23")) {
return "###";
}
if (value.equals("false")) {
return "&&&";
}
return value;
});

Assertions.assertThat(ByteValueMaskerContext.maskStringWith("not a secret", valueMasker))
.isEqualTo("\"not a secret\"");
Assertions.assertThat(ByteValueMaskerContext.maskStringWith("secret: very much", valueMasker))
.isEqualTo("\"***\"");
Assertions.assertThat(ByteValueMaskerContext.maskStringWith("", valueMasker))
.isEqualTo("\"\"");
Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(12345, valueMasker))
.isEqualTo("\"12345\"");
Assertions.assertThat(ByteValueMaskerContext.maskNumberWith(23456, valueMasker))
.isEqualTo("\"###\"");
Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(true, valueMasker))
.isEqualTo("\"true\"");
Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(false, valueMasker))
.isEqualTo("\"&&&\"");
Assertions.assertThat(ByteValueMaskerContext.maskBooleanWith(true, ValueMaskers.withTextFunction(value -> null)))
.isEqualTo("null");
}

@Test
void withTextFunctionEscapedCharacters() {
String jsonEncoded = "\\b\\t\\n\\f\\r\\\"\\\\";
Assertions.assertThat(ByteValueMaskerContext.maskStringWith(jsonEncoded, ValueMaskers.withTextFunction(value -> {
Assertions.assertThat(value).isEqualTo("\b\t\n\f\r\"\\");
return value;
}))).isEqualTo("\"" + jsonEncoded + "\""); // needs to be escaped exactly like input

String forwardSlash = "\\/";
Assertions.assertThat(ByteValueMaskerContext.maskStringWith(forwardSlash, ValueMaskers.withTextFunction(value -> {
Assertions.assertThat(value).isEqualTo("/");
return value;
}))).isEqualTo("\"/\""); // does not need to be escaped
}

private static Stream<List<String>> unicodeCharacters() {
// equivalent pairs of unicode characters: actual character (expected), JSON-escaped, Java-escaped
return Stream.of(
List.of("a", "\\u0061", "\u0061"), // 1-byte character
List.of("ƒ", "\\u0192", "\u0192"), // 2-byte character
List.of("€", "\\u20AC", "\u20AC"), // 3-byte character
List.of("䀀", "\\u4000", "\u4000"), // 3-byte character
List.of("𐍈", "\\uD800\\uDF48", "\uD800\uDF48"), // 4-byte character
List.of("𠜎", "\\uD841\\uDF0E", "\uD841\uDF0E"), // 4-byte character
List.of("💩", "\\uD83D\\uDCA9", "\uD83D\uDCA9") // 4-byte character
);
}

@ParameterizedTest
@MethodSource("unicodeCharacters")
void withTextFunctionUnicodeEncoded(List<String> characters) {
String expected = characters.get(0);
// equivalent pairs of unicode characters: JSON-escaped, Java-escaped, and actual character
for (String unicodeCharacter : characters) {
// single value
Assertions.assertThat(ByteValueMaskerContext.maskStringWith(unicodeCharacter, ValueMaskers.withTextFunction(value -> {
Assertions.assertThat(value).isEqualTo(expected);
return value;
}))).isEqualTo("\"" + expected + "\"");

// lowercase hex value, isn't really allowed by JSON specification, but Java supports that in Character.digit
// i.e. \\u20AC and \\u20ac both decoded to the same value €
Assertions.assertThat(ByteValueMaskerContext.maskStringWith(unicodeCharacter.toLowerCase(), ValueMaskers.withTextFunction(value -> {
Assertions.assertThat(value).isEqualTo(expected);
return value;
}))).isEqualTo("\"" + expected + "\"");

// double value
Assertions.assertThat(ByteValueMaskerContext.maskStringWith(unicodeCharacter + unicodeCharacter, ValueMaskers.withTextFunction(value -> {
Assertions.assertThat(value).isEqualTo(expected + expected);
return value;
}))).isEqualTo("\"" + expected + expected + "\"");

// with prefix
Assertions.assertThat(ByteValueMaskerContext.maskStringWith("prefix" + unicodeCharacter, ValueMaskers.withTextFunction(value -> {
Assertions.assertThat(value).isEqualTo("prefix" + expected);
return value;
}))).isEqualTo("\"prefix" + expected + "\"");

// with suffix
Assertions.assertThat(ByteValueMaskerContext.maskStringWith(unicodeCharacter + "suffix", ValueMaskers.withTextFunction(value -> {
Assertions.assertThat(value).isEqualTo(expected + "suffix");
return value;
}))).isEqualTo("\"" + expected + "suffix\"");

// with prefix and suffix
Assertions.assertThat(ByteValueMaskerContext.maskStringWith("prefix" + unicodeCharacter + "suffix", ValueMaskers.withTextFunction(value -> {
Assertions.assertThat(value).isEqualTo("prefix" + expected + "suffix");
return value;
}))).isEqualTo("\"prefix" + expected + "suffix\"");
}
}

@Test
void withTextFunctionInvalidEscape() {
ValueMasker.AnyValueMasker valueMasker = ValueMaskers.withTextFunction(value -> value);

Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\z", valueMasker))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Unexpected character after '\\': 'z' at index 3");
}

@Test
void withTextFunctionInvalidUnicode() {
ValueMasker.AnyValueMasker valueMasker = ValueMaskers.withTextFunction(value -> value);

// high surrogate without low surrogate
Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uD83D", valueMasker))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Invalid surrogate pair '\\uD83D' at index 1");

// high surrogate followed by another high surrogate
Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uD83D\\uD83D", valueMasker))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Invalid surrogate pair '\\uD83D\\uD83D' at index 1");

// high surrogate without low surrogate but other suffix
Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uD83Dsuffix", valueMasker))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Invalid surrogate pair '\\uD83D' at index 1");

// high surrogate without low surrogate but an escape character
Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uD83D\\n0000", valueMasker))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Invalid surrogate pair '\\uD83D' at index 1");

// low surrogate without high surrogate
Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uDCA9", valueMasker))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Invalid surrogate pair '\\uDCA9' at index 1");

// low surrogate without high surrogate but other prefix
Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("prefix\\uDCA9", valueMasker))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Invalid surrogate pair '\\uDCA9' at index 7");

// unicode character uses lowercase hex value
Assertions.assertThatThrownBy(() -> ByteValueMaskerContext.maskStringWith("\\uXXXX", valueMasker))
.isInstanceOf(InvalidJsonException.class)
.hasMessage("Invalid hex character 'X' at index 1");
}

@Test
Expand Down
Loading

0 comments on commit c0e63df

Please sign in to comment.