Skip to content

Commit

Permalink
Handle non-ASCII characters in field names (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
piotrrzysko authored Sep 12, 2023
1 parent dd6d5b5 commit 6b398c3
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 121 deletions.
82 changes: 27 additions & 55 deletions src/main/java/org/simdjson/JsonValue.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.simdjson;

import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;

Expand Down Expand Up @@ -60,7 +61,7 @@ public Iterator<JsonValue> arrayIterator() {
return new ArrayIterator(tapeIdx);
}

public Iterator<Map.Entry<CharSequence, JsonValue>> objectIterator() {
public Iterator<Map.Entry<String, JsonValue>> objectIterator() {
return new ObjectIterator(tapeIdx);
}

Expand All @@ -76,32 +77,34 @@ public boolean asBoolean() {
return tape.getType(tapeIdx) == TRUE_VALUE;
}

public CharSequence asCharSequence() {
return asCharSequence(tapeIdx);
public String asString() {
return getString(tapeIdx);
}

private CharSequence asCharSequence(int idx) {
int stringBufferIdx = (int) tape.getValue(idx);
private String getString(int tapeIdx) {
int stringBufferIdx = (int) tape.getValue(tapeIdx);
int len = IntegerUtils.toInt(stringBuffer, stringBufferIdx);
return new StringView(stringBufferIdx + Integer.BYTES, len);
return new String(stringBuffer, stringBufferIdx + Integer.BYTES, len, UTF_8);
}

public JsonValue get(String name) {
Iterator<Map.Entry<CharSequence, JsonValue>> it = objectIterator();
while (it.hasNext()) {
Map.Entry<CharSequence, JsonValue> entry = it.next();
CharSequence key = entry.getKey();
if (CharSequence.compare(key, name) == 0) {
return entry.getValue();
byte[] bytes = name.getBytes(UTF_8);
int idx = tapeIdx + 1;
int endIdx = tape.getMatchingBraceIndex(tapeIdx) - 1;
while (idx < endIdx) {
int stringBufferIdx = (int) tape.getValue(idx);
int len = IntegerUtils.toInt(stringBuffer, stringBufferIdx);
int valIdx = tape.computeNextIndex(idx);
idx = tape.computeNextIndex(valIdx);
int stringBufferFromIdx = stringBufferIdx + Integer.BYTES;
int stringBufferToIdx = stringBufferFromIdx + len;
if (Arrays.compare(bytes, 0, bytes.length, stringBuffer, stringBufferFromIdx, stringBufferToIdx) == 0) {
return new JsonValue(tape, valIdx, stringBuffer, buffer);
}
}
return null;
}

public String asString() {
return asCharSequence().toString();
}

public int getSize() {
return tape.getScopeCount(tapeIdx);
}
Expand All @@ -119,7 +122,7 @@ public String toString() {
return String.valueOf(asBoolean());
}
case STRING -> {
return asCharSequence().toString();
return asString();
}
case NULL_VALUE -> {
return "null";
Expand Down Expand Up @@ -160,7 +163,7 @@ public JsonValue next() {
}
}

private class ObjectIterator implements Iterator<Map.Entry<CharSequence, JsonValue>> {
private class ObjectIterator implements Iterator<Map.Entry<String, JsonValue>> {

private final int endIdx;

Expand All @@ -177,27 +180,27 @@ public boolean hasNext() {
}

@Override
public Map.Entry<CharSequence, JsonValue> next() {
CharSequence key = asCharSequence(idx);
public Map.Entry<String, JsonValue> next() {
String key = getString(idx);
idx = tape.computeNextIndex(idx);
JsonValue value = new JsonValue(tape, idx, stringBuffer, buffer);
idx = tape.computeNextIndex(idx);
return new ObjectField(key, value);
}
}

private static class ObjectField implements Map.Entry<CharSequence, JsonValue> {
private static class ObjectField implements Map.Entry<String, JsonValue> {

private final CharSequence key;
private final String key;
private final JsonValue value;

ObjectField(CharSequence key, JsonValue value) {
ObjectField(String key, JsonValue value) {
this.key = key;
this.value = value;
}

@Override
public CharSequence getKey() {
public String getKey() {
return key;
}

Expand All @@ -211,35 +214,4 @@ public JsonValue setValue(JsonValue value) {
throw new UnsupportedOperationException("Object fields are immutable");
}
}

private class StringView implements CharSequence {

private final int startIdx;
private final int len;

StringView(int startIdx, int len) {
this.startIdx = startIdx;
this.len = len;
}

@Override
public int length() {
return len;
}

@Override
public char charAt(int index) {
return (char) stringBuffer[startIdx + index];
}

@Override
public CharSequence subSequence(int start, int end) {
return new StringView(startIdx + start, startIdx + end);
}

@Override
public String toString() {
return new String(stringBuffer, startIdx, len, UTF_8);
}
}
}
8 changes: 0 additions & 8 deletions src/test/java/org/simdjson/JsonValueAssert.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import org.assertj.core.api.AbstractAssert;
import org.assertj.core.api.Assertions;

import static java.nio.charset.StandardCharsets.UTF_8;

class JsonValueAssert extends AbstractAssert<JsonValueAssert, JsonValue> {

JsonValueAssert(JsonValue actual) {
Expand Down Expand Up @@ -36,12 +34,6 @@ JsonValueAssert isEqualTo(String expected) {
.withFailMessage("Expecting value to be string but was " + getActualType())
.isTrue();
Assertions.assertThat(actual.asString()).isEqualTo(expected);
CharSequence cs = actual.asCharSequence();
byte[] bytesExpected = expected.getBytes(UTF_8);
Assertions.assertThat(cs.length()).isEqualTo(bytesExpected.length);
for (int i = 0; i < cs.length(); i++) {
Assertions.assertThat((byte) cs.charAt(i)).isEqualTo(bytesExpected[i]);
}
return this;
}

Expand Down
97 changes: 97 additions & 0 deletions src/test/java/org/simdjson/ObjectParsingTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.simdjson;

import org.junit.jupiter.api.Test;

import java.util.Iterator;
import java.util.Map;

import static org.assertj.core.api.Assertions.assertThat;
import static org.simdjson.JsonValueAssert.assertThat;
import static org.simdjson.StringUtils.toUtf8;

public class ObjectParsingTest {

@Test
public void emptyObject() {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toUtf8("{}");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.isObject()).isTrue();
Iterator<JsonValue> it = jsonValue.arrayIterator();
assertThat(it.hasNext()).isFalse();
}

@Test
public void objectIterator() {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toUtf8("{\"a\": 1, \"b\": 2, \"c\": 3}");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.isObject()).isTrue();
String[] expectedKeys = new String[]{"a", "b", "c"};
int[] expectedValue = new int[]{1, 2, 3};
int counter = 0;
Iterator<Map.Entry<String, JsonValue>> it = jsonValue.objectIterator();
while (it.hasNext()) {
Map.Entry<String, JsonValue> field = it.next();
assertThat(field.getKey()).isEqualTo(expectedKeys[counter]);
assertThat(field.getValue()).isEqualTo(expectedValue[counter]);
counter++;
}
assertThat(counter).isEqualTo(expectedKeys.length);
}

@Test
public void objectSize() {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toUtf8("{\"1\": 1, \"2\": 1, \"3\": 1}");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.isObject()).isTrue();
assertThat(jsonValue.getSize()).isEqualTo(3);
}

@Test
public void fieldNamesWithNonAsciiCharacters() {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.get("ąćśńźż")).isEqualTo(1);
assertThat(jsonValue.get("\u20A9\u0E3F")).isEqualTo(2);
assertThat(jsonValue.get("αβγ")).isEqualTo(3);
assertThat(jsonValue.get("😀abc😀")).isEqualTo(4);
}

@Test
public void nonexistentField() {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3}");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.get("acsnz")).isNull();
assertThat(jsonValue.get("\\u20A9\\u0E3F")).isNull();
assertThat(jsonValue.get("αβ")).isNull();
}
}
58 changes: 0 additions & 58 deletions src/test/java/org/simdjson/SimdJsonParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import org.junit.jupiter.params.provider.ValueSource;

import java.util.Iterator;
import java.util.Map;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.fail;
Expand Down Expand Up @@ -33,24 +32,6 @@ public void testEmptyArray() {
}
}

@Test
public void testEmptyObject() {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toUtf8("{}");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.isObject()).isTrue();
Iterator<JsonValue> it = jsonValue.arrayIterator();
while (it.hasNext()) {
fail("Unexpected field");
it.next();
}
}

@Test
public void testArrayIterator() {
// given
Expand All @@ -74,31 +55,6 @@ public void testArrayIterator() {
assertThat(counter).isEqualTo(expectedValues.length);
}

@Test
public void testObjectIterator() {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toUtf8("{\"a\": 1, \"b\": 2, \"c\": 3}");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.isObject()).isTrue();
String[] expectedKeys = new String[]{"a", "b", "c"};
int[] expectedValue = new int[]{1, 2, 3};
int counter = 0;
Iterator<Map.Entry<CharSequence, JsonValue>> it = jsonValue.objectIterator();
while (it.hasNext()) {
Map.Entry<CharSequence, JsonValue> field = it.next();
CharSequence key = field.getKey();
assertThat(key).usingComparator(CharSequence::compare).isEqualTo(expectedKeys[counter]);
assertThat(field.getValue()).isEqualTo(expectedValue[counter]);
counter++;
}
assertThat(counter).isEqualTo(expectedKeys.length);
}

@Test
public void testBooleanValues() {
// given
Expand Down Expand Up @@ -313,20 +269,6 @@ public void testArraySize() {
assertThat(jsonValue.getSize()).isEqualTo(3);
}

@Test
public void testObjectSize() {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = toUtf8("{\"1\":1,\"2\":1,\"3\":1}");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.isObject()).isTrue();
assertThat(jsonValue.getSize()).isEqualTo(3);
}

@Test
public void testLargeArraySize() {
// given
Expand Down

0 comments on commit 6b398c3

Please sign in to comment.