Skip to content

Commit

Permalink
finish simd string serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
JairusSW committed Dec 27, 2024
1 parent 54a67df commit bd96a29
Show file tree
Hide file tree
Showing 24 changed files with 381 additions and 256 deletions.
2 changes: 1 addition & 1 deletion as-test.config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"input": ["./assembly/__tests__/bool.spec.ts"],
"input": ["./assembly/__tests__/simd/string.spec.ts"],
"outDir": "./build",
"config": "none",
"plugins": {
Expand Down
23 changes: 13 additions & 10 deletions assembly/__tests__/simd/string.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { describe, expect, log, run } from "as-test/assembly";
import { describe, expect, run } from "as-test/assembly";
import { serializeString_SIMD } from "../../serialize/simd/string";
import { deserializeString_SIMD } from "../../deserialize/simd/string";

Expand All @@ -17,18 +17,21 @@ describe("Should serialize strings", () => {

expect(serialize_simd('string with colon : comma , brace [ ] bracket { } and quote " and other quote "'))
.toBe('"string with colon : comma , brace [ ] bracket { } and quote \\" and other quote \\""');

expect(serialize_simd("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u000f\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"))
.toBe("\"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000b\\f\\r\\u000e\\u000f\\u000f\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001a\\u001b\\u001c\\u001d\\u001e\\u001f\"")
});

describe("Should deserialize strings", () => {
expect(deserialize_simd('"abcdefg"')).toBe("abcdefg");
expect(deserialize_simd('"st\\"ring\\" w\\"\\"ith quotes\\""'))
.toBe('st"ring" w""ith quotes"');
// describe("Should deserialize strings", () => {
// expect(deserialize_simd('"abcdefg"')).toBe("abcdefg");
// expect(deserialize_simd('"st\\"ring\\" w\\"\\"ith quotes\\""'))
// .toBe('st"ring" w""ith quotes"');

// expect(deserialize_simd('"string \\"with random spa\\nces and \\nnewlines\\n\\n\\n"'))
// .toBe('string "with random spa\nces and \nnewlines\n\n\n');
// // expect(deserialize_simd('"string \\"with random spa\\nces and \\nnewlines\\n\\n\\n"'))
// // .toBe('string "with random spa\nces and \nnewlines\n\n\n');

// expect(deserialize_simd('"string with colon : comma , brace [ ] bracket { } and quote \\" and other quote \\""'))
// .toBe('string with colon : comma , brace [ ] bracket { } and quote " and other quote "');
});
// // expect(deserialize_simd('"string with colon : comma , brace [ ] bracket { } and quote \\" and other quote \\""'))
// // .toBe('string with colon : comma , brace [ ] bracket { } and quote " and other quote "');
// });

run();
21 changes: 13 additions & 8 deletions assembly/custom/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ export function getArrayDepth<T extends ArrayLike>(depth: i32 = 1): i32 {
@inline export function snip_fast<T extends number>(str: string, len: u32 = 0, offset: u32 = 0): T {
if (isSigned<T>()) {
const firstChar: u32 = load<u16>(changetype<usize>(str));
if (firstChar === 48) return 0 as T;
const isNegative = firstChar === 45; // Check if the number is negative
if (firstChar == 48) return 0 as T;
const isNegative = firstChar == 45; // Check if the number is negative
let val: T = 0 as T;
if (len == 0) len = u32(str.length << 1);
if (isNegative) {
Expand Down Expand Up @@ -156,7 +156,7 @@ export function getArrayDepth<T extends ArrayLike>(depth: i32 = 1): i32 {
}
} else {
const firstChar: u32 = load<u16>(changetype<usize>(str));
if (firstChar === 48) return 0 as T;
if (firstChar == 48) return 0 as T;
let val: T = 0 as T;
if (len == 0) len = u32(str.length << 1);
if (len >= 4) {
Expand Down Expand Up @@ -219,7 +219,7 @@ export function getArrayDepth<T extends ArrayLike>(depth: i32 = 1): i32 {
if (!end) end = start + u32(str.length << 1);
if (isSigned<T>()) {
// Negative path
if (load<u16>(changetype<usize>(str) + <usize>start) === 45) {
if (load<u16>(changetype<usize>(str) + <usize>start) == 45) {
start += 2;
for (; start < end; start += 2) {
val = (val * 10 + (load<u16>(changetype<usize>(str) + <usize>start) - 48)) as T;
Expand Down Expand Up @@ -253,14 +253,14 @@ export function getArrayDepth<T extends ArrayLike>(depth: i32 = 1): i32 {
let val: T = 0;
let offset = 0;
let firstChar = load<u16>(changetype<usize>(str) + <usize>offset);
if (firstChar === 45) {
if (firstChar == 45) {
offset = 2;
}
for (; offset < str.length << 1; offset += 2) {
const char = load<u16>(changetype<usize>(str) + <usize>offset);
if (char === 101 || char === 69) {
if (char == 101 || char == 69) {
const char = load<u16>(changetype<usize>(str) + <usize>(offset += 2));
if (char === 45) {
if (char == 45) {
// @ts-ignore
val /= sciNote<T>(__atoi_fast<T>(str, (offset += 2)));
// @ts-ignore
Expand All @@ -276,7 +276,7 @@ export function getArrayDepth<T extends ArrayLike>(depth: i32 = 1): i32 {
val = (val << 1) + (val << 3) + (char - 48);
// We use load because in this case, there is no need to have bounds-checking
}
if (firstChar === 45) {
if (firstChar == 45) {
val = -val as T;
}
return val;
Expand Down Expand Up @@ -336,4 +336,9 @@ export function getArrayDepth<T extends ArrayLike>(depth: i32 = 1): i32 {
return ((87 + low) << 16) | (87 + high);
}
}
}

// @ts-ignore: Decorator valid here
@inline export function nextPowerOf2(n: u32): u32 {
return 1 << (32 - clz(n - 1));
}
8 changes: 4 additions & 4 deletions assembly/deserialize/simple/array/array.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ export function deserializeArrayArray<T extends unknown[][]>(data: string): T {
//i++;
for (; i < data.length - 1; i++) {
const char = unsafeCharCodeAt(data, i);
if (char === BRACKET_LEFT) {
if (depth === 0) {
if (char == BRACKET_LEFT) {
if (depth == 0) {
lastPos = i;
}
// Shifting is 6% faster than incrementing
depth++;
} else if (char === BRACKET_RIGHT) {
} else if (char == BRACKET_RIGHT) {
depth--;
if (depth === 0) {
if (depth == 0) {
i++;
result.push(JSON.parse<valueof<T>>(data.slice(lastPos, i)));
}
Expand Down
4 changes: 2 additions & 2 deletions assembly/deserialize/simple/array/bool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import { deserializeBoolean } from "../bool";
let lastPos = 1;
for (let i = 1; i < data.length - 1; i++) {
const char = unsafeCharCodeAt(data, i);
if (char === CHAR_T || char === CHAR_F) {
if (char == CHAR_T || char == CHAR_F) {
lastPos = i;
} else if (char === CHAR_E) {
} else if (char == CHAR_E) {
i++;
result.push(deserializeBoolean(data.slice(lastPos, i)));
}
Expand Down
2 changes: 1 addition & 1 deletion assembly/deserialize/simple/array/float.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { deserializeFloat } from "../float";
let awaitingParse = false;
for (; i < data.length; i++) {
const char = unsafeCharCodeAt(data, i);
if (lastPos === 0 && ((char >= 48 && char <= 57) || char === 45)) {
if (lastPos == 0 && ((char >= 48 && char <= 57) || char == 45)) {
awaitingParse = true;
lastPos = i;
} else if (awaitingParse && (isSpace(char) || char == COMMA || char == BRACKET_RIGHT) && lastPos > 0) {
Expand Down
2 changes: 1 addition & 1 deletion assembly/deserialize/simple/array/integer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { deserializeInteger } from "../integer";
let awaitingParse = false;
for (; i < data.length; i++) {
const char = unsafeCharCodeAt(data, i);
if (lastPos === 0 && ((char >= 48 && char <= 57) || char === 45)) {
if (lastPos == 0 && ((char >= 48 && char <= 57) || char == 45)) {
awaitingParse = true;
lastPos = i;
} else if (awaitingParse && (isSpace(char) || char == COMMA || char == BRACKET_RIGHT) && lastPos > 0) {
Expand Down
8 changes: 4 additions & 4 deletions assembly/deserialize/simple/array/map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ export function deserializeMapArray<T extends unknown[]>(data: string): T {
let depth: u32 = 0;
for (let pos: u32 = 0; pos < <u32>data.length; pos++) {
const char = unsafeCharCodeAt(data, pos);
if (char === BRACE_LEFT) {
if (depth === 0) {
if (char == BRACE_LEFT) {
if (depth == 0) {
lastPos = pos;
}
depth++;
} else if (char === BRACE_RIGHT) {
} else if (char == BRACE_RIGHT) {
depth--;
if (depth === 0) {
if (depth == 0) {
pos++;
result.push(JSON.parse<valueof<T>>(data.slice(lastPos, pos)));
//lastPos = pos + 2;
Expand Down
8 changes: 4 additions & 4 deletions assembly/deserialize/simple/array/object.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ export function deserializeObjectArray<T extends unknown[]>(data: string): T {
let depth: u32 = 0;
for (let pos: u32 = 0; pos < <u32>data.length; pos++) {
const char = unsafeCharCodeAt(data, pos);
if (char === BRACE_LEFT) {
if (depth === 0) {
if (char == BRACE_LEFT) {
if (depth == 0) {
lastPos = pos;
}
depth++;
} else if (char === BRACE_RIGHT) {
} else if (char == BRACE_RIGHT) {
depth--;
if (depth === 0) {
if (depth == 0) {
pos++;
result.push(JSON.parse<valueof<T>>(data.slice(lastPos, pos)));
//lastPos = pos + 2;
Expand Down
6 changes: 3 additions & 3 deletions assembly/deserialize/simple/array/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ import { deserializeString } from "../string";
let escaping = false;
for (let i = 1; i < data.length - 1; i++) {
const char = unsafeCharCodeAt(data, i);
if (char === BACK_SLASH && !escaping) {
if (char == BACK_SLASH && !escaping) {
escaping = true;
} else {
if (char === QUOTE && !escaping) {
if (instr === false) {
if (char == QUOTE && !escaping) {
if (instr == false) {
instr = true;
lastPos = i;
} else {
Expand Down
4 changes: 2 additions & 2 deletions assembly/deserialize/simple/bool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { unsafeCharCodeAt } from "../../custom/util";
const len = end - start;
const ptr = changetype<usize>(data) + <usize>(start << 1);
const firstChar = unsafeCharCodeAt(data, start);
if (len === 4 && firstChar === CHAR_T && load<u64>(ptr) === 28429475166421108) return true;
else if (len === 5 && firstChar === CHAR_F && load<u64>(ptr, 2) === 28429466576093281) return false;
if (len == 4 && firstChar == CHAR_T && load<u64>(ptr) == 28429475166421108) return true;
else if (len == 5 && firstChar == CHAR_F && load<u64>(ptr, 2) == 28429466576093281) return false;
return false; //ERROR(`Expected to find boolean, but found "${data.slice(0, 100)}" instead!`);
}
34 changes: 17 additions & 17 deletions assembly/deserialize/simple/map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ import { deserializeFloat } from "./float";
let outerLoopIndex = 1;
for (; outerLoopIndex < data.length - 1; outerLoopIndex++) {
const char = unsafeCharCodeAt(data, outerLoopIndex);
if (char === BRACKET_LEFT) {
if (char == BRACKET_LEFT) {
for (let arrayValueIndex = outerLoopIndex; arrayValueIndex < data.length - 1; arrayValueIndex++) {
const char = unsafeCharCodeAt(data, arrayValueIndex);
if (char === BRACKET_LEFT) {
if (char == BRACKET_LEFT) {
depth++;
} else if (char === BRACKET_RIGHT) {
} else if (char == BRACKET_RIGHT) {
depth--;
if (depth === 0) {
if (depth == 0) {
++arrayValueIndex;
map.set(deserializeMapKey<indexof<T>>(key), JSON.parse<valueof<T>>(data.slice(outerLoopIndex, arrayValueIndex)));
outerLoopIndex = arrayValueIndex;
Expand All @@ -34,14 +34,14 @@ import { deserializeFloat } from "./float";
}
}
}
} else if (char === BRACE_LEFT) {
} else if (char == BRACE_LEFT) {
for (let objectValueIndex = outerLoopIndex; objectValueIndex < data.length - 1; objectValueIndex++) {
const char = unsafeCharCodeAt(data, objectValueIndex);
if (char === BRACE_LEFT) {
if (char == BRACE_LEFT) {
depth++;
} else if (char === BRACE_RIGHT) {
} else if (char == BRACE_RIGHT) {
depth--;
if (depth === 0) {
if (depth == 0) {
++objectValueIndex;
map.set(deserializeMapKey<indexof<T>>(key), JSON.parse<valueof<T>>(data.slice(outerLoopIndex, objectValueIndex)));
outerLoopIndex = objectValueIndex;
Expand All @@ -50,15 +50,15 @@ import { deserializeFloat } from "./float";
}
}
}
} else if (char === QUOTE) {
} else if (char == QUOTE) {
let escaping = false;
for (let stringValueIndex = ++outerLoopIndex; stringValueIndex < data.length - 1; stringValueIndex++) {
const char = unsafeCharCodeAt(data, stringValueIndex);
if (char === BACK_SLASH && !escaping) {
if (char == BACK_SLASH && !escaping) {
escaping = true;
} else {
if (char === QUOTE && !escaping) {
if (isKey === false) {
if (char == QUOTE && !escaping) {
if (isKey == false) {
// perf: we can avoid creating a new string here if the key doesn't contain any escape sequences
if (containsCodePoint(data, BACK_SLASH, outerLoopIndex, stringValueIndex)) {
key.reinst(deserializeString(data, outerLoopIndex - 1, stringValueIndex));
Expand All @@ -79,26 +79,26 @@ import { deserializeFloat } from "./float";
escaping = false;
}
}
} else if (char == CHAR_N && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_U && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_L && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_L) {
} else if (char == CHAR_N && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_U && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_L && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_L) {
if (isNullable<valueof<T>>()) {
map.set(deserializeMapKey<indexof<T>>(key), null);
}
isKey = false;
} else if (char === CHAR_T && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_R && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_U && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_E) {
} else if (char == CHAR_T && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_R && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_U && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_E) {
if (isBoolean<valueof<T>>()) {
map.set(deserializeMapKey<indexof<T>>(key), true);
}
isKey = false;
} else if (char === CHAR_F && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_A && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_L && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_S && unsafeCharCodeAt(data, ++outerLoopIndex) === CHAR_E) {
} else if (char == CHAR_F && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_A && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_L && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_S && unsafeCharCodeAt(data, ++outerLoopIndex) == CHAR_E) {
if (isBoolean<valueof<T>>()) {
map.set(deserializeMapKey<indexof<T>>(key), false);
}
isKey = false;
} else if ((char >= 48 && char <= 57) || char === 45) {
} else if ((char >= 48 && char <= 57) || char == 45) {
let numberValueIndex = ++outerLoopIndex;
for (; numberValueIndex < data.length; numberValueIndex++) {
const char = unsafeCharCodeAt(data, numberValueIndex);
if (char === COLON || char === COMMA || char === BRACE_RIGHT || isSpace(char)) {
if (char == COLON || char == COMMA || char == BRACE_RIGHT || isSpace(char)) {
if (isInteger<valueof<T>>()) {
map.set(deserializeMapKey<indexof<T>>(key), deserializeInteger<valueof<T>>(data.slice(outerLoopIndex - 1, numberValueIndex)));
} else if (isFloat<valueof<T>>()) {
Expand Down
Loading

0 comments on commit bd96a29

Please sign in to comment.