From 45a06127e5c8a554b4389d1f685b1559fabf398d Mon Sep 17 00:00:00 2001 From: Jairus Date: Thu, 26 Dec 2024 21:32:56 -0800 Subject: [PATCH] loopless string serialization (for the remainder) --- assembly/__benches__/misc.bench.ts | 8 ++- assembly/serialize/simd/string.ts | 93 ++++++++++++++++++++++++++++-- assembly/test.ts | 6 +- 3 files changed, 96 insertions(+), 11 deletions(-) diff --git a/assembly/__benches__/misc.bench.ts b/assembly/__benches__/misc.bench.ts index 315fa6c..8574639 100644 --- a/assembly/__benches__/misc.bench.ts +++ b/assembly/__benches__/misc.bench.ts @@ -1,8 +1,8 @@ import { deserializeString_SIMD } from "../deserialize/simd/string"; import { serializeString_SIMD } from "../serialize/simd/string"; import { bench } from "as-bench/assembly/index" -const str = '""""""""'; -const str2 = '"\"\"\"\"\"\"\"\""'; +const str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()\\\"\t\r\f\n\u0000'; +const str2 = '"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()\\\\\\"\\t\\r\\f\\n\\u0000"'; // bench("Serialize String (Simple)", () => { // serializeString(str); @@ -13,11 +13,13 @@ const str2 = '"\"\"\"\"\"\"\"\""'; // bs.reset(); // }); -const out = new ArrayBuffer(16); +const out = new ArrayBuffer(256); bench("Serialize String (SIMD)", () => { + // ~5.07GB/s serializeString_SIMD(str, changetype(out)); }); bench("Deserialize String (SIMD)", () => { + // ~4.03GB/s deserializeString_SIMD(str2, changetype(out)); }); \ No newline at end of file diff --git a/assembly/serialize/simd/string.ts b/assembly/serialize/simd/string.ts index 108bcbc..ab02ccf 100644 --- a/assembly/serialize/simd/string.ts +++ b/assembly/serialize/simd/string.ts @@ -43,6 +43,7 @@ const SPLAT_34 = i16x8.splat(34); /* " */ const SPLAT_92 = i16x8.splat(92); /* \ */ const SPLAT_32 = i16x8.splat(32); /* [ESC] */ +const SPLAT_0 = i16x8.splat(0); /* 0 */ /** * Serializes strings into their JSON counterparts using SIMD operations @@ -91,11 +92,94 @@ export function serializeString_SIMD(src: string, dst: usize): usize { } } - src_ptr += 16; - dst_ptr += 16; + src_ptr += 16; dst_ptr += 16; } - while (src_ptr < src_end) { + let rem = src_end - src_ptr; + + if (rem & 8) { + const block = v128.load64_zero(src_ptr); + v128.store64_lane(dst_ptr, block, 0); + + const backslash_indices = i16x8.eq(block, SPLAT_92); + const quote_indices = i16x8.eq(block, SPLAT_34); + const escape_indices = i16x8.lt_u(block, SPLAT_32); + const zero_indices = i16x8.eq(block, SPLAT_0); + const sieve = v128.and(v128.or(v128.or(backslash_indices, quote_indices), escape_indices), v128.not(zero_indices)); + + let mask = i16x8.bitmask(sieve); + while (mask != 0) { + let lane_index = ctz(mask) << 1; + const dst_offset = dst_ptr + lane_index; + const src_offset = src_ptr + lane_index; + const code = load(src_offset) << 2; + const escaped = load(ESCAPE_TABLE + code); + mask &= mask - 1; + + if ((escaped & 0xFFFF) != BACK_SLASH) { + store(dst_offset, 13511005048209500); + store(dst_offset, escaped, 8); + while (lane_index < 6) { + store(dst_ptr + lane_index, load(src_ptr + lane_index, 2), 12); + lane_index += 2; + } + dst_ptr += 10; + } else { + store(dst_offset, escaped); + + while (lane_index < 6) { + store(dst_ptr + lane_index, load(src_ptr + lane_index, 2), 4); + lane_index += 2; + } + dst_ptr += 2; + } + } + + dst_ptr += 8; src_ptr += 8; + } + if (rem & 4) { + const block = load(src_ptr); + + const codeA = block & 0xFFFF; + const codeB = block >> 16; + + if (codeA == 92 || codeA == 34 || codeA < 32) { + const escaped = load(ESCAPE_TABLE + (codeA << 2)); + + if ((escaped & 0xFFFF) != BACK_SLASH) { + store(dst_ptr, 13511005048209500); + store(dst_ptr, escaped, 8); + dst_ptr += 12; + } else { + store(dst_ptr, escaped); + dst_ptr += 4; + } + + } else { + store(dst_ptr, codeA); + dst_ptr += 2; + } + + if (codeB == 92 || codeB == 34 || codeB < 32) { + const escaped = load(ESCAPE_TABLE + (codeB << 2)); + + if ((escaped & 0xFFFF) != BACK_SLASH) { + store(dst_ptr, 13511005048209500); + store(dst_ptr, escaped, 8); + dst_ptr += 12; + } else { + store(dst_ptr, escaped); + dst_ptr += 4; + } + + } else { + store(dst_ptr, codeB); + dst_ptr += 2; + } + + src_ptr += 4; + } + if (rem & 2) { const code = load(src_ptr); if (code == 92 || code == 34 || code < 32) { const escaped = load(ESCAPE_TABLE + (code << 2)); @@ -104,17 +188,14 @@ export function serializeString_SIMD(src: string, dst: usize): usize { store(dst_ptr, 13511005048209500); store(dst_ptr, escaped, 8); dst_ptr += 12; - src_ptr += 2; } else { store(dst_ptr, escaped); dst_ptr += 4; - src_ptr += 2; } } else { store(dst_ptr, code); dst_ptr += 2; - src_ptr += 2; } } diff --git a/assembly/test.ts b/assembly/test.ts index 965e614..f99ceb6 100644 --- a/assembly/test.ts +++ b/assembly/test.ts @@ -10,13 +10,15 @@ import { JSON } from "."; // public z: T; // } -let a = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u000f\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"; +let a = "12\"345678123456"; const b = "world"; -JSON.stringifyTo(a, a) +let c = "000000000000000000000000000000000000000"; +JSON.stringifyTo(a, c) // console.log(JSON.stringifyTo(a, a)); console.log("A: " + a.toString()); console.log("B: " + b.toString()); +console.log("C: " + c.toString()); // console.log(new Vec3().__SERIALIZE())