From ddfd2794ea69367b2151389069f8cc5760c3f0ed Mon Sep 17 00:00:00 2001 From: Arthur Gavlyukovskiy Date: Fri, 22 Dec 2023 12:30:51 +0100 Subject: [PATCH] Added regex benchmark, split benchmarks into baseline and json-masker, added parameter with character set to test how well json-masker is handling resizes (#23) --- src/jmh/benchmark-history/2023-12-20.md | 88 +++++++++++++++++++ .../masker/json/BaselineBenchmark.java | 88 +++++++++++++++++++ .../masker/json/BenchmarkUtils.java | 41 +++++++++ .../masker/json/JsonMaskerBenchmark.java | 67 +++----------- 4 files changed, 228 insertions(+), 56 deletions(-) create mode 100644 src/jmh/benchmark-history/2023-12-20.md create mode 100644 src/jmh/java/dev/blaauwendraad/masker/json/BaselineBenchmark.java diff --git a/src/jmh/benchmark-history/2023-12-20.md b/src/jmh/benchmark-history/2023-12-20.md new file mode 100644 index 00000000..a732e283 --- /dev/null +++ b/src/jmh/benchmark-history/2023-12-20.md @@ -0,0 +1,88 @@ +- OS: Windows 11 +- CPU: AMD Ryzen 5 7600X +- Java: 17.0.7 + +```text +Benchmark (characters) (jsonSize) (maskedKeyProbability) (obfuscationLength) Mode Cnt Score Error Units +BaselineBenchmark.countBytes N/A 1kb N/A N/A thrpt 4074057.577 ops/s +BaselineBenchmark.countBytes N/A 128kb N/A N/A thrpt 33992.898 ops/s +BaselineBenchmark.countBytes N/A 2mb N/A N/A thrpt 2253.498 ops/s +BaselineBenchmark.jacksonParseAndMask N/A 1kb N/A N/A thrpt 195683.419 ops/s +BaselineBenchmark.jacksonParseAndMask N/A 128kb N/A N/A thrpt 722.508 ops/s +BaselineBenchmark.jacksonParseAndMask N/A 2mb N/A N/A thrpt 27.133 ops/s +BaselineBenchmark.regexReplace N/A 1kb N/A N/A thrpt 11729.500 ops/s +BaselineBenchmark.regexReplace N/A 128kb N/A N/A thrpt 74.242 ops/s +BaselineBenchmark.regexReplace N/A 2mb N/A N/A thrpt 5.408 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 1kb 0.01 none thrpt 2672694.020 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 1kb 0.01 8 thrpt 838428.960 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 1kb 0.1 none thrpt 2025416.416 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 1kb 0.1 8 thrpt 1653016.050 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 128kb 0.01 none thrpt 12053.279 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 128kb 0.01 8 thrpt 5361.050 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 128kb 0.1 none thrpt 9459.132 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 128kb 0.1 8 thrpt 337.606 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 2mb 0.01 none thrpt 487.307 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 2mb 0.01 8 thrpt 12.569 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 2mb 0.1 none thrpt 431.350 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii (no quote) 2mb 0.1 8 thrpt 1.966 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 1kb 0.01 none thrpt 2819386.235 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 1kb 0.01 8 thrpt 2502055.170 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 1kb 0.1 none thrpt 2541085.908 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 1kb 0.1 8 thrpt 1486612.366 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 128kb 0.01 none thrpt 12533.878 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 128kb 0.01 8 thrpt 4054.251 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 128kb 0.1 none thrpt 8758.617 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 128kb 0.1 8 thrpt 473.697 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 2mb 0.01 none thrpt 454.732 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 2mb 0.01 8 thrpt 13.287 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 2mb 0.1 none thrpt 14.704 ops/s +JsonMaskerBenchmark.jsonMaskerBytes ascii 2mb 0.1 8 thrpt 1.795 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 1kb 0.01 none thrpt 3579539.318 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 1kb 0.01 8 thrpt 3591563.062 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 1kb 0.1 none thrpt 3436922.834 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 1kb 0.1 8 thrpt 3688532.991 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 128kb 0.01 none thrpt 14381.218 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 128kb 0.01 8 thrpt 2160.292 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 128kb 0.1 none thrpt 9132.054 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 128kb 0.1 8 thrpt 417.713 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 2mb 0.01 none thrpt 17.061 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 2mb 0.01 8 thrpt 19.017 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 2mb 0.1 none thrpt 2.855 ops/s +JsonMaskerBenchmark.jsonMaskerBytes unicode 2mb 0.1 8 thrpt 2.490 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 1kb 0.01 none thrpt 1559147.328 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 1kb 0.01 8 thrpt 1720395.410 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 1kb 0.1 none thrpt 1340913.940 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 1kb 0.1 8 thrpt 726683.288 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 128kb 0.01 none thrpt 9707.708 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 128kb 0.01 8 thrpt 2644.963 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 128kb 0.1 none thrpt 8718.749 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 128kb 0.1 8 thrpt 538.299 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 2mb 0.01 none thrpt 400.015 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 2mb 0.01 8 thrpt 16.719 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 2mb 0.1 none thrpt 347.794 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii (no quote) 2mb 0.1 8 thrpt 1.727 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 1kb 0.01 none thrpt 1691838.470 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 1kb 0.01 8 thrpt 1577028.930 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 1kb 0.1 none thrpt 935689.291 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 1kb 0.1 8 thrpt 472768.967 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 128kb 0.01 none thrpt 8974.417 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 128kb 0.01 8 thrpt 1963.733 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 128kb 0.1 none thrpt 1533.492 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 128kb 0.1 8 thrpt 425.324 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 2mb 0.01 none thrpt 72.243 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 2mb 0.01 8 thrpt 18.094 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 2mb 0.1 none thrpt 10.938 ops/s +JsonMaskerBenchmark.jsonMaskerString ascii 2mb 0.1 8 thrpt 1.873 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 1kb 0.01 none thrpt 592610.914 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 1kb 0.01 8 thrpt 632087.084 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 1kb 0.1 none thrpt 507522.810 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 1kb 0.1 8 thrpt 579923.646 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 128kb 0.01 none thrpt 1454.359 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 128kb 0.01 8 thrpt 1426.964 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 128kb 0.1 none thrpt 546.959 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 128kb 0.1 8 thrpt 348.036 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 2mb 0.01 none thrpt 17.853 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 2mb 0.01 8 thrpt 23.375 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 2mb 0.1 none thrpt 2.752 ops/s +JsonMaskerBenchmark.jsonMaskerString unicode 2mb 0.1 8 thrpt 2.885 ops/s +``` \ No newline at end of file diff --git a/src/jmh/java/dev/blaauwendraad/masker/json/BaselineBenchmark.java b/src/jmh/java/dev/blaauwendraad/masker/json/BaselineBenchmark.java new file mode 100644 index 00000000..6f64b0fb --- /dev/null +++ b/src/jmh/java/dev/blaauwendraad/masker/json/BaselineBenchmark.java @@ -0,0 +1,88 @@ +package dev.blaauwendraad.masker.json; + +import com.fasterxml.jackson.databind.ObjectMapper; +import dev.blaauwendraad.masker.json.config.JsonMaskingConfig; +import org.openjdk.jmh.annotations.*; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +@Warmup(iterations = 1, time = 3) +@Fork(value = 1) +@Measurement(iterations = 1, time = 3) +@OutputTimeUnit(TimeUnit.SECONDS) +@BenchmarkMode(Mode.Throughput) +public class BaselineBenchmark { + + @org.openjdk.jmh.annotations.State(Scope.Thread) + public static class State { + @Param({"1kb", "128kb", "2mb"}) + String jsonSize; + @Param({"unicode"}) + String characters; + @Param({"0.01"}) + double maskedKeyProbability; + + private Set targetKeys; + private String jsonString; + private byte[] jsonBytes; + private ObjectMapper objectMapper; + private List regexList; + + @Setup + public synchronized void setup() { + targetKeys = BenchmarkUtils.getTargetKeys(20); + jsonString = BenchmarkUtils.randomJson(targetKeys, jsonSize, characters, maskedKeyProbability); + jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8); + + objectMapper = new ObjectMapper(); + + regexList = targetKeys.stream() + // will only match primitive values, not objects or arrays, but it's good to show the difference + .map(key -> Pattern.compile("(\"" + key + "\"\\s*:\\s*)(\"?[^\"]*\"?)", Pattern.CASE_INSENSITIVE)) + .collect(Collectors.toList()); + } + } + + @Benchmark + public int countBytes(State state) { + int sum = 0; + for (int i = 0; i < state.jsonBytes.length; i++) { + sum += state.jsonBytes[i]; + } + return sum; + } + + @Benchmark + public String regexReplace(State state) { + String masked = state.jsonString; + for (Pattern pattern : state.regexList) { + Matcher matcher = pattern.matcher(masked); + if (matcher.find()) { + masked = matcher.replaceAll(matchResult -> { + String beforeValuePart = matchResult.group(1); + String value = matchResult.group(2); + int maskCount = value.startsWith("\"") ? value.length() - 2 : value.length(); + return beforeValuePart + "*".repeat(maskCount); + }); + } + } + return masked; + } + + @Benchmark + public String jacksonParseAndMask(State state) throws IOException { + return ParseAndMaskUtil.mask( + state.jsonString, + state.targetKeys, + JsonMaskingConfig.TargetKeyMode.MASK, + state.objectMapper + ).toString(); + } +} diff --git a/src/jmh/java/dev/blaauwendraad/masker/json/BenchmarkUtils.java b/src/jmh/java/dev/blaauwendraad/masker/json/BenchmarkUtils.java index 31d4195c..63073b8f 100644 --- a/src/jmh/java/dev/blaauwendraad/masker/json/BenchmarkUtils.java +++ b/src/jmh/java/dev/blaauwendraad/masker/json/BenchmarkUtils.java @@ -1,6 +1,15 @@ package dev.blaauwendraad.masker.json; +import randomgen.json.RandomJsonGenerator; +import randomgen.json.RandomJsonGeneratorConfig; + +import java.util.HashSet; +import java.util.Set; import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static randomgen.json.JsonStringCharacters.*; +import static randomgen.json.JsonStringCharacters.getRandomPrintableUnicodeCharacters; public class BenchmarkUtils { @@ -25,4 +34,36 @@ public static int parseSize(String size) { } return sizeBytes; } + + public static Set getTargetKeys(int count) { + Set targetKeys = new HashSet<>(); + for (int i = 0; i < count; i++) { + targetKeys.add("someSecret" + i); + } + return targetKeys; + } + + public static String randomJson(Set targetKeys, String jsonSize, String characters, double targetKeyPercentage) { + Set allowedCharacters = switch (characters) { + case "ascii (no quote)" -> getPrintableAsciiCharacters() + .stream() + .filter(c -> c != '"') + .collect(Collectors.toSet()); + case "ascii" -> getPrintableAsciiCharacters(); + case "unicode" -> mergeCharSets( + getPrintableAsciiCharacters(), + getUnicodeControlCharacters(), + getRandomPrintableUnicodeCharacters() + ); + default -> throw new IllegalArgumentException("Invalid characters param: " + characters + ", must be one of: 'ascii (no quote)', 'ascii', 'unicode'"); + }; + RandomJsonGeneratorConfig config = RandomJsonGeneratorConfig.builder() + .setAllowedCharacters(allowedCharacters) + .setTargetKeys(targetKeys) + .setTargetKeyPercentage(targetKeyPercentage) + .setTargetJsonSizeBytes(BenchmarkUtils.parseSize(jsonSize)) + .createConfig(); + + return new RandomJsonGenerator(config).createRandomJsonNode().toString(); + } } diff --git a/src/jmh/java/dev/blaauwendraad/masker/json/JsonMaskerBenchmark.java b/src/jmh/java/dev/blaauwendraad/masker/json/JsonMaskerBenchmark.java index b8d95da2..1b8eba32 100644 --- a/src/jmh/java/dev/blaauwendraad/masker/json/JsonMaskerBenchmark.java +++ b/src/jmh/java/dev/blaauwendraad/masker/json/JsonMaskerBenchmark.java @@ -1,6 +1,5 @@ package dev.blaauwendraad.masker.json; -import com.fasterxml.jackson.databind.ObjectMapper; import dev.blaauwendraad.masker.json.config.JsonMaskingConfig; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -12,18 +11,11 @@ import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.Warmup; -import randomgen.json.RandomJsonGenerator; -import randomgen.json.RandomJsonGeneratorConfig; -import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.util.HashSet; import java.util.Objects; import java.util.Set; import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; - -import static randomgen.json.JsonStringCharacters.getPrintableAsciiCharacters; @Warmup(iterations = 1, time = 3) @Fork(value = 1) @@ -34,71 +26,34 @@ public class JsonMaskerBenchmark { @org.openjdk.jmh.annotations.State(Scope.Thread) public static class State { - @Param({ "1kb", "128kb", "2mb" }) + @Param({"1kb", "128kb", "2mb"}) String jsonSize; - @Param({ "0.01", "0.1" }) + @Param({"ascii (no quote)", "ascii", "unicode"}) + String characters; + @Param({"0.01", "0.1"}) double maskedKeyProbability; - @Param({ "none", "8" }) + @Param({"none", "8"}) String obfuscationLength; + private String jsonString; private byte[] jsonBytes; private JsonMasker jsonMasker; - private ObjectMapper objectMapper; @Setup public synchronized void setup() { - Set keysToBeMasked = getTargetKeys(); - - RandomJsonGeneratorConfig config = RandomJsonGeneratorConfig.builder() - .setAllowedCharacters( - getPrintableAsciiCharacters().stream() - .filter(c -> c != '"') - .collect(Collectors.toSet()) - ) - .setTargetKeys(keysToBeMasked) - .setTargetKeyPercentage(maskedKeyProbability) - .setTargetJsonSizeBytes(BenchmarkUtils.parseSize(jsonSize)) - .createConfig(); + Set targetKeys = BenchmarkUtils.getTargetKeys(20); - jsonString = new RandomJsonGenerator(config).createRandomJsonNode().toString(); + jsonString = BenchmarkUtils.randomJson(targetKeys, jsonSize, characters, maskedKeyProbability); jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8); jsonMasker = JsonMasker.getMasker( - JsonMaskingConfig.custom(keysToBeMasked, JsonMaskingConfig.TargetKeyMode.MASK) + JsonMaskingConfig.custom(targetKeys, JsonMaskingConfig.TargetKeyMode.MASK) .obfuscationLength(Objects.equals(obfuscationLength, "none") - ? -1 - : Integer.parseInt(obfuscationLength)) + ? -1 + : Integer.parseInt(obfuscationLength)) .build() ); - objectMapper = new ObjectMapper(); - } - - private Set getTargetKeys() { - Set targetKeys = new HashSet<>(); - for (int i = 0; i < 20; i++) { - targetKeys.add("someSecret" + i); - } - return targetKeys; - } - } - - @Benchmark - public int baselineCountBytes(State state) { - int sum = 0; - for (int i = 0; i < state.jsonBytes.length; i++) { - sum += state.jsonBytes[i]; } - return sum; - } - - @Benchmark - public String jacksonString(State state) throws IOException { - return ParseAndMaskUtil.mask( - state.jsonString, - state.getTargetKeys(), - JsonMaskingConfig.TargetKeyMode.MASK, - state.objectMapper - ).toString(); } @Benchmark