Skip to content

Commit

Permalink
Implement input/output streaming support (#163)
Browse files Browse the repository at this point in the history
Implement streaming support

---------

Co-authored-by: breus <[email protected]>
  • Loading branch information
donavdey and Breus authored Sep 22, 2024
1 parent 2458b81 commit ba6154a
Show file tree
Hide file tree
Showing 37 changed files with 1,001 additions and 245 deletions.
39 changes: 29 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,22 @@
[![Sonar Coverage](https://img.shields.io/sonar/coverage/Breus_json-masker?server=https%3A%2F%2Fsonarcloud.io&color=appveyor&style=flat-square)](https://sonarcloud.io/project/overview?id=Breus_json-masker)
[![Sonar Tests](https://img.shields.io/sonar/total_tests/Breus_json-masker?server=https%3A%2F%2Fsonarcloud.io&style=flat-square)](https://sonarcloud.io/project/overview?id=Breus_json-masker)

JSON masker library which can be used to mask (sensitive) values inside JSON corresponding to a set of keys (**block-mode**)
or, alternatively, allow only specific values to be unmasked corresponding to a set of keys while all others are
masked (**allow-mode**).
JSON Masker library allows for highly flexible masking of sensitive data in JSON, supporting two modes:

The library provides modern and convenient Java APIs which offers a wide range of masking customizations.
Furthermore, the implementation is focused on maximizing the throughput and minimizing heap memory allocations to minimize
GC pressure.
* Block Mode: Mask values corresponding to a specified set of keys.
* Allow Mode: Unmask only the values corresponding to specified keys, while masking all others.

Finally, no additional third-party runtime dependencies are required to use this library.
The library provides modern and convenient Java APIs, offering extensive masking customizations. It includes both
streaming and in-memory APIs to cater to various use cases.

The library is designed for high throughput and efficient memory usage, it minimizes heap allocations to reduce GC
pressure.

Finally, no additional third-party runtime dependencies are required to use this library.

## Features

* Mask a user-provided stream of JSON and write it to a user-provided output stream
* Mask all primitive values by specifying the keys to mask, by default any `string` is masked as `"***"`, any `number`
as `"###"` and any `boolean` as `"&&&"`
* If the value of a targeted key corresponds to an `object`, all nested fields, including nested arrays and objects will
Expand Down Expand Up @@ -331,6 +335,23 @@ String maskedJson = jsonMasker.mask(json);
}
```

### Masking with the streaming API
To mask (potentially) large JSON input, the streaming API can be used.

All features of the JsonMasker work exactly the same for the streaming API and the (default) in-memory API.

#### Usage
```java
var jsonMasker = JsonMasker.getMasker(
JsonMaskingConfig.builder()
.maskKeys(Set.of("email", "iban"))
.build()
);

jsonMasker.mask(jsonInputStream, jsonOutputStream);
```


### Masking with JSONPath

To have more control over the nesting, JSONPath can be used to specify the keys that needs to be masked (allowed).
Expand Down Expand Up @@ -730,9 +751,7 @@ String maskedJson = jsonMasker.mask(json);

## Dependencies

* **The library has no third-party runtime dependencies**
* The library only has a single JSR-305 compilation dependency for nullability annotations
* The test/benchmark dependencies for this library are listed in the `build.gradle`
**The library has no third-party runtime dependencies**

## Performance

Expand Down
16 changes: 16 additions & 0 deletions src/jmh/java/dev/blaauwendraad/masker/json/BaselineBenchmark.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;

import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -54,6 +58,11 @@ public synchronized void setup() {
.map(key -> Pattern.compile("(\"" + key + "\"\\s*:\\s*)(\"?[^\"]*\"?)", Pattern.CASE_INSENSITIVE))
.toList();
}

@TearDown
public synchronized void tearDown() throws IOException {
Files.deleteIfExists(Path.of("file.json"));
}
}

@Benchmark
Expand All @@ -65,6 +74,13 @@ public int countBytes(State state) {
return sum;
}

@Benchmark
public void writeFile(State state) throws IOException {
try (FileWriter fileWriter = new FileWriter("file.json")) {
fileWriter.write(state.jsonString);
}
}

@Benchmark
public String regexReplace(State state) {
String masked = state.jsonString;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import org.openjdk.jmh.annotations.Warmup;
import dev.blaauwendraad.masker.json.util.JsonPathTestUtils;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.util.Set;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -66,4 +68,9 @@ public String jsonMaskerString(State state) {
public byte[] jsonMaskerBytes(State state) {
return state.jsonMasker.mask(state.jsonBytes);
}

@Benchmark
public void jsonMaskerByteArrayStreams(State state) {
state.jsonMasker.mask(new ByteArrayInputStream(state.jsonBytes), new ByteArrayOutputStream());
}
}
105 changes: 105 additions & 0 deletions src/jmh/java/dev/blaauwendraad/masker/json/StreamTypeBenchmark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package dev.blaauwendraad.masker.json;

import dev.blaauwendraad.masker.json.config.JsonMaskingConfig;
import dev.blaauwendraad.masker.json.util.JsonPathTestUtils;
import org.jspecify.annotations.NullUnmarked;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Set;
import java.util.concurrent.TimeUnit;

@Warmup(iterations = 1, time = 3)
@Fork(value = 1)
@Measurement(iterations = 1, time = 3)
@OutputTimeUnit(TimeUnit.SECONDS)
@BenchmarkMode(Mode.Throughput)
public class StreamTypeBenchmark {

static final String INPUT_FILE_STREAM_NAME = "input.json";
static final String OUTPUT_FILE_STREAM_NAME = "output.json";

@org.openjdk.jmh.annotations.State(Scope.Thread)
@NullUnmarked
public static class State {
@Param({"ByteArrayStream", "FileStream"})
String streamInputType;
@Param({"ByteArrayStream", "FileStream"})
String streamOutputType;
@Param({"10mb"})
String jsonSize;

byte[] json;

private JsonMasker jsonMasker;

@Setup
public synchronized void setup() throws IOException {
// prepare a json
Set<String> targetKeys = BenchmarkUtils.getTargetKeys(20);
json = BenchmarkUtils.randomJson(targetKeys, jsonSize, "unicode", 0.1).getBytes(StandardCharsets.UTF_8);

// prepare an input file for FileStreams
try (FileWriter inputFileWriter = new FileWriter(INPUT_FILE_STREAM_NAME)) {
inputFileWriter.write(new String(json, StandardCharsets.UTF_8));
inputFileWriter.flush();
}

// create a masker
JsonMaskingConfig.Builder builder = JsonMaskingConfig.builder();
builder.maskJsonPaths(JsonPathTestUtils.transformToJsonPathKeys(targetKeys, new String(json, StandardCharsets.UTF_8)));

jsonMasker = JsonMasker.getMasker(builder.build());
}

@TearDown
public synchronized void tearDown() throws IOException {
Files.deleteIfExists(Path.of(INPUT_FILE_STREAM_NAME));
Files.deleteIfExists(Path.of(OUTPUT_FILE_STREAM_NAME));
}
}

private InputStream createInputStream(byte[] json, String inputStreamType) throws IOException {
return switch (inputStreamType) {
case "ByteArrayStream" -> new ByteArrayInputStream(json);
case "FileStream" -> new FileInputStream(INPUT_FILE_STREAM_NAME);
default -> throw new IllegalArgumentException("Unknown stream type");
};
}

private OutputStream createOutputStream(String outputStreamType) throws IOException {
return switch (outputStreamType) {
case "ByteArrayStream" -> new ByteArrayOutputStream();
case "FileStream" -> new FileOutputStream(OUTPUT_FILE_STREAM_NAME);
default -> throw new IllegalArgumentException("Unknown stream type");
};
}

@Benchmark
public void jsonMaskerStreams(State state) throws IOException {
try (InputStream inputStream = createInputStream(state.json, state.streamInputType);
OutputStream outputStream = createOutputStream(state.streamOutputType)) {
state.jsonMasker.mask(inputStream, outputStream);
}
}
}
14 changes: 14 additions & 0 deletions src/main/java/dev/blaauwendraad/masker/json/JsonMasker.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import dev.blaauwendraad.masker.json.config.JsonMaskingConfig;

import java.io.InputStream;
import java.io.OutputStream;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.util.Set;

Expand Down Expand Up @@ -39,6 +42,17 @@ static JsonMasker getMasker(JsonMaskingConfig maskingConfig) {
*/
byte[] mask(byte[] input);

/**
* Masks the given JSON input stream and writes the result into the output stream.
*
* @param inputStream the JSON input stream
* @param outputStream masked JSON output stream
* @throws InvalidJsonException in case invalid JSON input was provided
* @throws UncheckedIOException if an I/O error occurs while reading from the input stream or writing to the output
* stream
*/
void mask(InputStream inputStream, OutputStream outputStream);

/**
* Masks the given JSON input and returns the masked output.
*
Expand Down
50 changes: 37 additions & 13 deletions src/main/java/dev/blaauwendraad/masker/json/KeyContainsMasker.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import dev.blaauwendraad.masker.json.util.AsciiJsonUtil;
import org.jspecify.annotations.Nullable;

import java.io.InputStream;
import java.io.OutputStream;

/**
* Default implementation of the {@link JsonMasker}.
*/
Expand All @@ -16,8 +19,9 @@ final class KeyContainsMasker implements JsonMasker {
private final KeyMatcher keyMatcher;
/**
* The masking configuration for the JSON masking process.
* Package private for unit tests.
*/
private final JsonMaskingConfig maskingConfig;
final JsonMaskingConfig maskingConfig;

/**
* Creates an instance of an {@link KeyContainsMasker}
Expand All @@ -39,9 +43,26 @@ final class KeyContainsMasker implements JsonMasker {
*/
@Override
public byte[] mask(byte[] input) {
try {
MaskingState maskingState = new MaskingState(input, !maskingConfig.getTargetJsonPaths().isEmpty());
MaskingState maskingState = new MaskingState(input, !maskingConfig.getTargetJsonPaths().isEmpty());
return mask(maskingState);
}

/**
* Runs masker in a streaming mode.
* The masker buffers data from provided input stream in chunks of size 8192 bytes and processes each chunk
* sequentially. The output is written into provided output stream after processing each chunk.
*
* @param inputStream the JSON input stream
* @param outputStream masked JSON output stream
*/
@Override
public void mask(InputStream inputStream, OutputStream outputStream) {
MaskingState maskingState = new MaskingState(inputStream, outputStream, !maskingConfig.getTargetJsonPaths().isEmpty(), maskingConfig.bufferSize());
mask(maskingState);
}

private byte[] mask(MaskingState maskingState) {
try {
KeyMaskingConfig keyMaskingConfig = maskingConfig.isInAllowMode() ? maskingConfig.getDefaultConfig() : null;
if (maskingState.jsonPathEnabled()) {
maskingState.expandCurrentJsonPath(keyMatcher.getJsonPathRootNode());
Expand All @@ -54,6 +75,7 @@ public byte[] mask(byte[] input) {
maskingState.next();
}
}
maskingState.flushCurrentBuffer();

return maskingState.flushReplacementOperations();
} catch (ArrayIndexOutOfBoundsException | StackOverflowError e) {
Expand Down Expand Up @@ -163,15 +185,17 @@ private void visitObject(MaskingState maskingState, @Nullable KeyMaskingConfig p
break;
}
// In case target keys should be considered as allow list, we need to NOT mask certain keys
int openingQuoteIndex = maskingState.currentIndex();
maskingState.registerTokenStartIndex();

stepOverStringValue(maskingState);

int keyStartIndex = maskingState.getCurrentTokenStartIndex();
int afterClosingQuoteIndex = maskingState.currentIndex();
int keyLength = afterClosingQuoteIndex - openingQuoteIndex - 2; // minus the opening and closing quotes
maskingState.expandCurrentJsonPath(keyMatcher.traverseJsonPathSegment(maskingState.getMessage(), maskingState.getCurrentJsonPathNode(), openingQuoteIndex + 1, keyLength));
KeyMaskingConfig keyMaskingConfig = keyMatcher.getMaskConfigIfMatched(maskingState.getMessage(), openingQuoteIndex + 1, // plus one for the opening quote
int keyLength = afterClosingQuoteIndex - keyStartIndex - 2; // minus the opening and closing quotes
maskingState.expandCurrentJsonPath(keyMatcher.traverseJsonPathSegment(maskingState.getMessage(), maskingState.getCurrentJsonPathNode(), keyStartIndex + 1, keyLength));
KeyMaskingConfig keyMaskingConfig = keyMatcher.getMaskConfigIfMatched(maskingState.getMessage(), keyStartIndex + 1, // plus one for the opening quote
keyLength, maskingState.getCurrentJsonPathNode());
maskingState.clearTokenStartIndex();
stepOverWhitespaceCharacters(maskingState);
// step over the colon ':'
maskingState.next();
Expand Down Expand Up @@ -217,12 +241,12 @@ private void visitObject(MaskingState maskingState, @Nullable KeyMaskingConfig p
* @param keyMaskingConfig the {@link KeyMaskingConfig} for the corresponding JSON key
*/
private void maskString(MaskingState maskingState, KeyMaskingConfig keyMaskingConfig) {
maskingState.registerValueStartIndex();
maskingState.registerTokenStartIndex();
stepOverStringValue(maskingState);

keyMaskingConfig.getStringValueMasker().maskValue(maskingState);

maskingState.clearValueStartIndex();
maskingState.clearTokenStartIndex();
}

/**
Expand All @@ -238,12 +262,12 @@ private void maskString(MaskingState maskingState, KeyMaskingConfig keyMaskingCo
*/
private void maskNumber(MaskingState maskingState, KeyMaskingConfig keyMaskingConfig) {
// This block deals with numeric values
maskingState.registerValueStartIndex();
maskingState.registerTokenStartIndex();
stepOverNumericValue(maskingState);

keyMaskingConfig.getNumberValueMasker().maskValue(maskingState);

maskingState.clearValueStartIndex();
maskingState.clearTokenStartIndex();
}

/**
Expand All @@ -255,12 +279,12 @@ private void maskNumber(MaskingState maskingState, KeyMaskingConfig keyMaskingCo
* @param keyMaskingConfig the {@link KeyMaskingConfig} for the corresponding JSON key
*/
private void maskBoolean(MaskingState maskingState, KeyMaskingConfig keyMaskingConfig) {
maskingState.registerValueStartIndex();
maskingState.registerTokenStartIndex();
maskingState.incrementIndex(AsciiCharacter.isLowercaseT(maskingState.byteAtCurrentIndex()) ? 4 : 5);

keyMaskingConfig.getBooleanValueMasker().maskValue(maskingState);

maskingState.clearValueStartIndex();
maskingState.clearTokenStartIndex();
}

/**
Expand Down
Loading

0 comments on commit ba6154a

Please sign in to comment.