Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add SimdJsonParser2 base on bitindex #60

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ group = 'org.simdjson'
version = scmVersion.version

repositories {
mavenLocal()
mavenCentral()
}

Expand All @@ -45,6 +46,7 @@ java {
ext {
junitVersion = '5.10.2'
jsoniterScalaVersion = '2.28.4'
lombokVersion = '1.18.34'
}

dependencies {
Expand All @@ -53,13 +55,18 @@ dependencies {
jmhImplementation group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-core_2.13', version: jsoniterScalaVersion
jmhImplementation group: 'com.google.guava', name: 'guava', version: '32.1.2-jre'
compileOnly group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-macros_2.13', version: jsoniterScalaVersion
compileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
annotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
testCompileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
testAnnotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion

testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2'
testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
testImplementation group: 'org.junit-pioneer', name: 'junit-pioneer', version: '2.2.0'
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion
testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion

}

tasks.register('downloadTestData') {
Expand Down Expand Up @@ -160,7 +167,9 @@ publishing {
publications {
mavenJava(MavenPublication) {
from(components.java)

groupId = 'org.simdjson'
artifactId = 'simdjson-java'
version = scmVersion.version
pom {
name = project.name
description = 'A Java version of simdjson, a high-performance JSON parser utilizing SIMD instructions.'
Expand Down
49 changes: 49 additions & 0 deletions src/jmh/java/org/simdjson/ParseAndSelectFixPathBenchMark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.simdjson;

import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.TimeUnit;

import org.openjdk.jmh.annotations.*;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

@State(Scope.Benchmark)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
public class ParseAndSelectFixPathBenchMark {
@Param({"/twitter.json"})
String fileName;
private byte[] buffer;
private final SimdJsonParser simdJsonParser = new SimdJsonParser();
private final ObjectMapper jacksonObjectMapper = new ObjectMapper();
private final SimdJsonParserWithFixPath simdJsonParserWithFixPath = new SimdJsonParserWithFixPath(
"statuses.0.user.default_profile", "statuses.0.user.screen_name",
"statuses.0.user.name", "statuses.0.user.id", "statuses.0.user.description",
"statuses.1.user.default_profile", "statuses.1.user.screen_name",
"statuses.1.user.name", "statuses.1.user.id", "statuses.1.user.description");

@Setup(Level.Trial)
public void setup() throws IOException {
try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) {
buffer = is.readAllBytes();
}
System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES);
}

@Benchmark
public JsonValue parseMultiValuesForFixPaths_SimdJson() {
return simdJsonParser.parse(buffer, buffer.length);
}

@Benchmark
public String[] parseMultiValuesForFixPaths_SimdJsonParserWithFixPath() {
return simdJsonParserWithFixPath.parse(buffer, buffer.length);
}

@Benchmark
public JsonNode parseMultiValuesForFixPaths_Jackson() throws IOException {
return jacksonObjectMapper.readTree(buffer);
}
}
8 changes: 5 additions & 3 deletions src/main/java/org/simdjson/BitIndexes.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.simdjson;

class BitIndexes {
import java.util.Arrays;

public class BitIndexes {

private final int[] indexes;

Expand Down Expand Up @@ -44,8 +46,8 @@ private long clearLowestBit(long bits) {
return bits & (bits - 1);
}

void advance() {
readIdx++;
int advance() {
return indexes[readIdx++];
}

int getAndAdvance() {
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/org/simdjson/SimdJsonParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ public SimdJsonParser(int capacity, int maxDepth) {
paddedBuffer = new byte[capacity];
indexer = new StructuralIndexer(bitIndexes);
}

BitIndexes buildBitIndex (byte[] buffer, int len) {
byte[] padded = padIfNeeded(buffer, len);
reset();
stage1(padded, len);
return bitIndexes;
}
public <T> T parse(byte[] buffer, int len, Class<T> expectedType) {
byte[] padded = padIfNeeded(buffer, len);
reset();
Expand Down
219 changes: 219 additions & 0 deletions src/main/java/org/simdjson/SimdJsonParserWithFixPath.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
package org.simdjson;

import java.util.HashMap;
import java.util.Map;

import lombok.Data;
import lombok.RequiredArgsConstructor;

public class SimdJsonParserWithFixPath {

@Data
@RequiredArgsConstructor
static class JsonNode {
private long version = 0;
private boolean isLeaf = false;
private final String name;
private String value = null;
private JsonNode parent = null;
private Map<String, JsonNode> children = new HashMap<>();
private int start = -1;
private int end = -1;
}

private final SimdJsonParser parser;
private BitIndexes bitIndexes;
private final JsonNode root = new JsonNode(null);
private final JsonNode[] row;
private final String[] result;
private final String[] emptyResult;
private JsonNode ptr;
private byte[] buffer;
private final int expectParseCols;
// every time json string is processed, currentVersion will be incremented by 1
private long currentVersion = 0;

public SimdJsonParserWithFixPath(String... args) {
parser = new SimdJsonParser();
expectParseCols = args.length;
row = new JsonNode[expectParseCols];
result = new String[expectParseCols];
emptyResult = new String[expectParseCols];
for (int i = 0; i < args.length; i++) {
emptyResult[i] = null;
}
for (int i = 0; i < expectParseCols; i++) {
JsonNode cur = root;
String[] paths = args[i].split("\\.");
for (int j = 0; j < paths.length; j++) {
if (!cur.getChildren().containsKey(paths[j])) {
JsonNode child = new JsonNode(paths[j]);
cur.getChildren().put(paths[j], child);
child.setParent(cur);
}
cur = cur.getChildren().get(paths[j]);
}
cur.setLeaf(true);
row[i] = cur;
}

}

public String[] parse(byte[] buffer, int len) {
this.bitIndexes = parser.buildBitIndex(buffer, len);
if (buffer == null || buffer.length == 0) {
return emptyResult;
}
this.currentVersion++;
this.ptr = root;
this.buffer = buffer;

switch (buffer[bitIndexes.peek()]) {
case '{' -> {
parseMap();
}
case '[' -> {
parseList();
}
default -> {
throw new RuntimeException("invalid json format");
}
}
return getResult();
}

private String parseValue() {
int start = bitIndexes.advance();
int next = bitIndexes.peek();
String field = new String(buffer, start, next - start).trim();
if ("null".equalsIgnoreCase(field)) {
return null;
}
// field type is string or type is decimal
if (field.startsWith("\"")) {
field = field.substring(1, field.length() - 1);
}
return field;
}

private void parseElement(String expectFieldName) {
// if expectFieldName is null, parent is map, else is list
if (expectFieldName == null) {
expectFieldName = parseValue();
bitIndexes.advance(); // skip :
}
if (!ptr.getChildren().containsKey(expectFieldName)) {
skip(false);
return;
}
ptr = ptr.getChildren().get(expectFieldName);
switch (buffer[bitIndexes.peek()]) {
case '{' -> {
parseMap();
}
case '[' -> {
parseList();
}
default -> {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
}
}
ptr = ptr.getParent();
}

private void parseMap() {
if (ptr.getChildren() == null) {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
return;
}
ptr.setStart(bitIndexes.peek());
bitIndexes.advance();
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}') {
parseElement(null);
if (buffer[bitIndexes.peek()] == ',') {
bitIndexes.advance();
}
}
ptr.setEnd(bitIndexes.peek());
if (ptr.isLeaf()) {
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
ptr.setVersion(currentVersion);
}
bitIndexes.advance();
}

private void parseList() {
if (ptr.getChildren() == null) {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
return;
}
ptr.setStart(bitIndexes.peek());
bitIndexes.advance();
int i = 0;
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']') {
parseElement("" + i);
if (buffer[bitIndexes.peek()] == ',') {
bitIndexes.advance();
}
i++;
}
ptr.setEnd(bitIndexes.peek());
if (ptr.isLeaf()) {
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
ptr.setVersion(currentVersion);
}
bitIndexes.advance();
}

private String skip(boolean retainValue) {
int i = 0;
int start = retainValue ? bitIndexes.peek() : 0;
switch (buffer[bitIndexes.peek()]) {
case '{' -> {
i++;
while (i > 0) {
bitIndexes.advance();
if (buffer[bitIndexes.peek()] == '{') {
i++;
} else if (buffer[bitIndexes.peek()] == '}') {
i--;
}
}
int end = bitIndexes.peek();
bitIndexes.advance();
return retainValue ? new String(buffer, start, end - start + 1) : null;
}
case '[' -> {
i++;
while (i > 0) {
bitIndexes.advance();
if (buffer[bitIndexes.peek()] == '[') {
i++;
} else if (buffer[bitIndexes.peek()] == ']') {
i--;
}
}
int end = bitIndexes.peek();
bitIndexes.advance();
return retainValue ? new String(buffer, start, end - start + 1) : null;
}
default -> {
return parseValue();
}
}
}

private String[] getResult() {
for (int i = 0; i < expectParseCols; i++) {
if (row[i].getVersion() < currentVersion) {
result[i] = null;
continue;
}
result[i] = row[i].getValue();
}
return result;
}
}
33 changes: 33 additions & 0 deletions src/test/java/org/simdjson/JsonMultiValueParsingTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.simdjson;

import static org.simdjson.testutils.SimdJsonAssertions.assertThat;
import static org.simdjson.testutils.TestUtils.toUtf8;

import org.junit.jupiter.api.Test;

public class JsonMultiValueParsingTest {
@Test
public void testParseMultiValue() {
byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}");
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("field1.field2", "field1.field3", "field4", "field4.0", "field5");
String[] result = parser.parse(json, json.length);
assertThat(result[0]).isEqualTo("value2");
assertThat(result[1]).isEqualTo("3");
assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]");
assertThat(result[3]).isEqualTo("value4");
assertThat(result[4]).isEqualTo(null);
}

@Test
public void testNonAsciiCharacters() {
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}");
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀");
// when
String[] result = parser.parse(json, json.length);
// then
assertThat(result[0]).isEqualTo("1");
assertThat(result[1]).isEqualTo("2");
assertThat(result[2]).isEqualTo("3");
assertThat(result[3]).isEqualTo("4");
}
}