Skip to content

Commit

Permalink
Added regex benchmark, split benchmarks into baseline and json-masker…
Browse files Browse the repository at this point in the history
…, added parameter with character set to test how well json-masker is handling resizes (#23)
  • Loading branch information
gavlyukovskiy authored Dec 22, 2023
1 parent c01f44e commit ddfd279
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 56 deletions.
88 changes: 88 additions & 0 deletions src/jmh/benchmark-history/2023-12-20.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
- OS: Windows 11
- CPU: AMD Ryzen 5 7600X
- Java: 17.0.7

```text
Benchmark (characters) (jsonSize) (maskedKeyProbability) (obfuscationLength) Mode Cnt Score Error Units
BaselineBenchmark.countBytes N/A 1kb N/A N/A thrpt 4074057.577 ops/s
BaselineBenchmark.countBytes N/A 128kb N/A N/A thrpt 33992.898 ops/s
BaselineBenchmark.countBytes N/A 2mb N/A N/A thrpt 2253.498 ops/s
BaselineBenchmark.jacksonParseAndMask N/A 1kb N/A N/A thrpt 195683.419 ops/s
BaselineBenchmark.jacksonParseAndMask N/A 128kb N/A N/A thrpt 722.508 ops/s
BaselineBenchmark.jacksonParseAndMask N/A 2mb N/A N/A thrpt 27.133 ops/s
BaselineBenchmark.regexReplace N/A 1kb N/A N/A thrpt 11729.500 ops/s
BaselineBenchmark.regexReplace N/A 128kb N/A N/A thrpt 74.242 ops/s
BaselineBenchmark.regexReplace N/A 2mb N/A N/A thrpt 5.408 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 1kb 0.01 none thrpt 2672694.020 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 1kb 0.01 8 thrpt 838428.960 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 1kb 0.1 none thrpt 2025416.416 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 1kb 0.1 8 thrpt 1653016.050 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 128kb 0.01 none thrpt 12053.279 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 128kb 0.01 8 thrpt 5361.050 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 128kb 0.1 none thrpt 9459.132 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 128kb 0.1 8 thrpt 337.606 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 2mb 0.01 none thrpt 487.307 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 2mb 0.01 8 thrpt 12.569 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 2mb 0.1 none thrpt 431.350 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 2mb 0.1 8 thrpt 1.966 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 1kb 0.01 none thrpt 2819386.235 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 1kb 0.01 8 thrpt 2502055.170 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 1kb 0.1 none thrpt 2541085.908 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 1kb 0.1 8 thrpt 1486612.366 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 128kb 0.01 none thrpt 12533.878 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 128kb 0.01 8 thrpt 4054.251 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 128kb 0.1 none thrpt 8758.617 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 128kb 0.1 8 thrpt 473.697 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 2mb 0.01 none thrpt 454.732 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 2mb 0.01 8 thrpt 13.287 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 2mb 0.1 none thrpt 14.704 ops/s
JsonMaskerBenchmark.jsonMaskerBytes ascii 2mb 0.1 8 thrpt 1.795 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 1kb 0.01 none thrpt 3579539.318 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 1kb 0.01 8 thrpt 3591563.062 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 1kb 0.1 none thrpt 3436922.834 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 1kb 0.1 8 thrpt 3688532.991 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 128kb 0.01 none thrpt 14381.218 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 128kb 0.01 8 thrpt 2160.292 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 128kb 0.1 none thrpt 9132.054 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 128kb 0.1 8 thrpt 417.713 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 2mb 0.01 none thrpt 17.061 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 2mb 0.01 8 thrpt 19.017 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 2mb 0.1 none thrpt 2.855 ops/s
JsonMaskerBenchmark.jsonMaskerBytes unicode 2mb 0.1 8 thrpt 2.490 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 1kb 0.01 none thrpt 1559147.328 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 1kb 0.01 8 thrpt 1720395.410 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 1kb 0.1 none thrpt 1340913.940 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 1kb 0.1 8 thrpt 726683.288 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 128kb 0.01 none thrpt 9707.708 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 128kb 0.01 8 thrpt 2644.963 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 128kb 0.1 none thrpt 8718.749 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 128kb 0.1 8 thrpt 538.299 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 2mb 0.01 none thrpt 400.015 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 2mb 0.01 8 thrpt 16.719 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 2mb 0.1 none thrpt 347.794 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 2mb 0.1 8 thrpt 1.727 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 1kb 0.01 none thrpt 1691838.470 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 1kb 0.01 8 thrpt 1577028.930 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 1kb 0.1 none thrpt 935689.291 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 1kb 0.1 8 thrpt 472768.967 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 128kb 0.01 none thrpt 8974.417 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 128kb 0.01 8 thrpt 1963.733 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 128kb 0.1 none thrpt 1533.492 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 128kb 0.1 8 thrpt 425.324 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 2mb 0.01 none thrpt 72.243 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 2mb 0.01 8 thrpt 18.094 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 2mb 0.1 none thrpt 10.938 ops/s
JsonMaskerBenchmark.jsonMaskerString ascii 2mb 0.1 8 thrpt 1.873 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 1kb 0.01 none thrpt 592610.914 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 1kb 0.01 8 thrpt 632087.084 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 1kb 0.1 none thrpt 507522.810 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 1kb 0.1 8 thrpt 579923.646 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 128kb 0.01 none thrpt 1454.359 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 128kb 0.01 8 thrpt 1426.964 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 128kb 0.1 none thrpt 546.959 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 128kb 0.1 8 thrpt 348.036 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 2mb 0.01 none thrpt 17.853 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 2mb 0.01 8 thrpt 23.375 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 2mb 0.1 none thrpt 2.752 ops/s
JsonMaskerBenchmark.jsonMaskerString unicode 2mb 0.1 8 thrpt 2.885 ops/s
```
88 changes: 88 additions & 0 deletions src/jmh/java/dev/blaauwendraad/masker/json/BaselineBenchmark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package dev.blaauwendraad.masker.json;

import com.fasterxml.jackson.databind.ObjectMapper;
import dev.blaauwendraad.masker.json.config.JsonMaskingConfig;
import org.openjdk.jmh.annotations.*;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@Warmup(iterations = 1, time = 3)
@Fork(value = 1)
@Measurement(iterations = 1, time = 3)
@OutputTimeUnit(TimeUnit.SECONDS)
@BenchmarkMode(Mode.Throughput)
public class BaselineBenchmark {

@org.openjdk.jmh.annotations.State(Scope.Thread)
public static class State {
@Param({"1kb", "128kb", "2mb"})
String jsonSize;
@Param({"unicode"})
String characters;
@Param({"0.01"})
double maskedKeyProbability;

private Set<String> targetKeys;
private String jsonString;
private byte[] jsonBytes;
private ObjectMapper objectMapper;
private List<Pattern> regexList;

@Setup
public synchronized void setup() {
targetKeys = BenchmarkUtils.getTargetKeys(20);
jsonString = BenchmarkUtils.randomJson(targetKeys, jsonSize, characters, maskedKeyProbability);
jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);

objectMapper = new ObjectMapper();

regexList = targetKeys.stream()
// will only match primitive values, not objects or arrays, but it's good to show the difference
.map(key -> Pattern.compile("(\"" + key + "\"\\s*:\\s*)(\"?[^\"]*\"?)", Pattern.CASE_INSENSITIVE))
.collect(Collectors.toList());
}
}

@Benchmark
public int countBytes(State state) {
int sum = 0;
for (int i = 0; i < state.jsonBytes.length; i++) {
sum += state.jsonBytes[i];
}
return sum;
}

@Benchmark
public String regexReplace(State state) {
String masked = state.jsonString;
for (Pattern pattern : state.regexList) {
Matcher matcher = pattern.matcher(masked);
if (matcher.find()) {
masked = matcher.replaceAll(matchResult -> {
String beforeValuePart = matchResult.group(1);
String value = matchResult.group(2);
int maskCount = value.startsWith("\"") ? value.length() - 2 : value.length();
return beforeValuePart + "*".repeat(maskCount);
});
}
}
return masked;
}

@Benchmark
public String jacksonParseAndMask(State state) throws IOException {
return ParseAndMaskUtil.mask(
state.jsonString,
state.targetKeys,
JsonMaskingConfig.TargetKeyMode.MASK,
state.objectMapper
).toString();
}
}
41 changes: 41 additions & 0 deletions src/jmh/java/dev/blaauwendraad/masker/json/BenchmarkUtils.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
package dev.blaauwendraad.masker.json;

import randomgen.json.RandomJsonGenerator;
import randomgen.json.RandomJsonGeneratorConfig;

import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static randomgen.json.JsonStringCharacters.*;
import static randomgen.json.JsonStringCharacters.getRandomPrintableUnicodeCharacters;

public class BenchmarkUtils {

Expand All @@ -25,4 +34,36 @@ public static int parseSize(String size) {
}
return sizeBytes;
}

public static Set<String> getTargetKeys(int count) {
Set<String> targetKeys = new HashSet<>();
for (int i = 0; i < count; i++) {
targetKeys.add("someSecret" + i);
}
return targetKeys;
}

public static String randomJson(Set<String> targetKeys, String jsonSize, String characters, double targetKeyPercentage) {
Set<Character> allowedCharacters = switch (characters) {
case "ascii (no quote)" -> getPrintableAsciiCharacters()
.stream()
.filter(c -> c != '"')
.collect(Collectors.toSet());
case "ascii" -> getPrintableAsciiCharacters();
case "unicode" -> mergeCharSets(
getPrintableAsciiCharacters(),
getUnicodeControlCharacters(),
getRandomPrintableUnicodeCharacters()
);
default -> throw new IllegalArgumentException("Invalid characters param: " + characters + ", must be one of: 'ascii (no quote)', 'ascii', 'unicode'");
};
RandomJsonGeneratorConfig config = RandomJsonGeneratorConfig.builder()
.setAllowedCharacters(allowedCharacters)
.setTargetKeys(targetKeys)
.setTargetKeyPercentage(targetKeyPercentage)
.setTargetJsonSizeBytes(BenchmarkUtils.parseSize(jsonSize))
.createConfig();

return new RandomJsonGenerator(config).createRandomJsonNode().toString();
}
}
Loading

0 comments on commit ddfd279

Please sign in to comment.