Skip to content

Commit

Permalink
properly escape \0xOB in strings
Browse files Browse the repository at this point in the history
  • Loading branch information
JairusSW committed Aug 10, 2024
1 parent b0ca099 commit 83e37d8
Show file tree
Hide file tree
Showing 17 changed files with 256 additions and 305 deletions.
3 changes: 1 addition & 2 deletions asconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,5 @@
"options": {
"transform": ["./transform", "as-test/transform"],
"disableWarning": [226]
},
"extends": "./node_modules/@assemblyscript/wasi-shim/asconfig.json"
}
}
1 change: 1 addition & 0 deletions assembly/__benches__/as-tral.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/// <reference types="@as-tral/cli/as-tral" />
14 changes: 14 additions & 0 deletions assembly/__benches__/bool.bench.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// import { bs } from "../custom/bs";
// import { serializeBool, serializeBool_BS } from "../serialize/bool"

// const out = memory.data(65536);

// bench("Serialize Bool", () => {
// blackbox<string>(serializeBool(true));
// });

// bench("Serialize Bool BS", () => {
// serializeBool_BS(true);
// bs._out(out);
// bs.reset();
// });
36 changes: 36 additions & 0 deletions assembly/__benches__/simd.bench.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// function esc_128(data: string): bool {
// let len = data.length;

// let running = v128.splat<i64>(0);
// let i = 0;

// while (i + 15 < len) {
// let w = v128.load(changetype<usize>(data));
// running = v128.or(running, v128.eq<i16>(w, i16x8.splat(34)));
// running = v128.or(running, v128.eq<i16>(w, i16x8.splat(92)));

// const subtracted = v128.sub<i16>(w, i8x16.splat(31));
// running = v128.or(running, v128.eq<i16>(subtracted, v128.splat<i64>(0)));
// i += 16;
// }

// return v128.any_true(running);
// }

// function esc_16(data: string): bool {
// let len = data.length;
// let b: u16 = 0;
// while (len--) {
// const c = load<u16>(changetype<usize>(data) + (len << 1));
// b |= u16(c < 32) | u16(c == 34) | u16(c == 92);
// }
// return bool(b);
// }

// bench("needs escaping 128", () => {
// blackbox<bool>(esc_128("hel\"o !!"));
// })

// bench("needs escaping 16", () => {
// blackbox<bool>(esc_16("hel\"o !!"));
// })
18 changes: 18 additions & 0 deletions assembly/__benches__/string.bench.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { bs } from "../custom/bs";
import { serializeBool, serializeBool_BS } from "../serialize/bool"
import { serialize_simd, serializeString, serializeString_BS } from "../serialize/string";

const out = memory.data(65536);

bench("Serialize String", () => {
blackbox<string>(serializeString("hello \"world abc"));
});

bench("Serialize String BS", () => {
serializeString_BS("hello \"world abc");
bs.reset();
});

bench("Serialize String SIMD", () => {
serialize_simd("hello \"world abc", out);
})
5 changes: 5 additions & 0 deletions assembly/custom/bs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ export namespace bs {
POINTER += 16;
if (MAX_CACHE <= POINTER) bs.shrink();
}
@inline export function write_128_n(chars: v128, n: usize): void {
store<v128>(POINTER, chars);
POINTER += n;
if (MAX_CACHE <= POINTER) bs.shrink();
}
@inline export function write_128_u(chars: v128): void {
store<v128>(POINTER, chars);
//POINTER += 16;
Expand Down
39 changes: 0 additions & 39 deletions assembly/custom/itoa.ts

This file was deleted.

13 changes: 13 additions & 0 deletions assembly/serialize/bool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,20 @@
* @param data data to serialize
* @returns string
*/

import { bs } from "../custom/bs";

// @ts-ignore: Decorator valid here
@inline export function serializeBool(data: bool): string {
return data ? "true" : "false";
}

@inline export function serializeBool_BS(data: bool): void {
if (data === true) {
bs.write_64(28429475166421108); /* true */
} else {
//bs.write_128_n(i16x8(102, 97, 108, 115, 101, 0, 0, 0), 10);
bs.write_64(32370086184550502); /* fals */
bs.write_16(101); /* e */
}
}
162 changes: 157 additions & 5 deletions assembly/serialize/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,157 @@ import {
} from "../custom/chars";
import { OBJECT, TOTAL_OVERHEAD } from "rt/common";
import { bs } from "../custom/bs";
import { _intTo16, intTo16 } from "../custom/util";
import { _intTo16, intTo16, unsafeCharCodeAt } from "../custom/util";
import { Sink } from "../custom/sink";

function needsEscaping(data: string): bool {
let len = data.length;

// @ts-ignore: Decorator valid here
// if (len < 16) {
// while (len--) {
// const char = load<u16>(changetype<usize>(data) + len);
// if (char == 34 || char == 92 || char <= 31) {
// return true;
// }
// }
// return false;
// }

let running = v128.splat<i64>(0);
//let i = 0;

//while (i + 15 < len) {
let w = v128.load(changetype<usize>(data));
running = v128.or(running, v128.eq<i16>(w, i16x8.splat(34)));
running = v128.or(running, v128.eq<i16>(w, i16x8.splat(92)));

let subtracted = v128.sub<i16>(w, i8x16.splat(31));
running = v128.or(running, v128.eq<i16>(subtracted, v128.splat<i64>(0)));
//i += 16;
//}

return v128.any_true(running);
}

// @ts-ignore: Decorator
@inline export function serialize_simd(src: string, dst: usize): void {
let src_ptr = changetype<usize>(src);
let dst_ptr = changetype<usize>(dst) + 2;

store<u16>(changetype<usize>(dst), 34); /* " */

const src_end = src_ptr + u32(src.length << 1);
const src_end_15 = src_end - 15;

while (src_ptr < src_end_15) {
const currentBlock = v128.load(src_ptr);
const backslash_indices = i16x8.eq(currentBlock, i16x8.splat(92));
const quote_indices = i16x8.eq(currentBlock, i16x8.splat(34));
const concat_indices = v128.or(quote_indices, backslash_indices);

const escape_indices = i16x8.lt_u(currentBlock, i16x8.splat(32));

if (v128.any_true(v128.or(escape_indices, concat_indices))) {
const mask = i16x8.bitmask(concat_indices);

const anomalies = popcnt(mask);
const start_index = (clz(mask) & ~1) + 2 // This essentially floors to the nearest even integer
//console.log(start_index.toString())
if (anomalies === 1) {
memory.copy(dst_ptr, src_ptr, start_index >> 1);
store<u16>(dst_ptr + start_index, 34);
memory.copy(dst_ptr + start_index + 2, src_ptr + start_index, (32 - start_index) >> 1)
}

if (v128.any_true(escape_indices)) {

}
//vis(src_ptr, mask);
dst_ptr += 16;
src_ptr += 16;
} else {
v128.store(dst_ptr, currentBlock);
//vis(src_ptr, 0);
src_ptr += 16;
dst_ptr += 16;
}
}
}

function vis(src_ptr: usize, mask: i32): void {
let chars = "";
let bits = "";
for (let i = 0; i < 8; i++) {
const char = load<u16>(src_ptr + (i << 1));
const bit = (mask >> i) & 1;
chars += String.fromCharCode(char) + " ";
bits += bit.toString() + " ";
}
console.log(chars);
console.log(bits);
}
// @ts-ignore: Decorator
@inline export function serializeString(data: string): string {
return bs.out<string>();x
if (!needsEscaping(data)) {
return "\"" + data + "\"";
}

if (data.length === 0) {
return "\"\"";
}
let result = Sink.fromString("\"");

let last: i32 = 0;
for (let i = 0; i < data.length; i++) {
const char = unsafeCharCodeAt(<string>data, i);
if (char === 34 || char === 92) {
result.write(<string>data, last, i);
result.writeCodePoint(92);
last = i;
} else if (char < 16) {
result.write(<string>data, last, i);
last = i + 1;
switch (char) {
case 8: {
result.write("\\b");
break;
}
case 9: {
result.write("\\t");
break;
}
case 10: {
result.write("\\n");
break;
}
case 12: {
result.write("\\f");
break;
}
case 13: {
result.write("\\r");
break;
}
default: {
// all chars 0-31 must be encoded as a four digit unicode escape sequence
// \u0000 to \u000f handled here
result.write("\\u000");
result.write(char.toString(16));
break;
}
}
} else if (char < 32) {
result.write(<string>data, last, i);
last = i + 1;
// all chars 0-31 must be encoded as a four digit unicode escape sequence
// \u0010 to \u001f handled here
result.write("\\u00");
result.write(char.toString(16));
}
}
result.write(<string>data, last);
result.writeCodePoint(34);
return result.toString();
}
// @ts-ignore: Decorator valid here
@inline export function serializeString_BS(data: string): void {
Expand All @@ -26,6 +171,7 @@ import { _intTo16, intTo16 } from "../custom/util";

bs.write_16(QUOTE);


let last: i32 = 0;
for (let i = 0; i < len; i += 2) {
const char = load<u16>(changetype<usize>(data) + i);
Expand Down Expand Up @@ -83,6 +229,12 @@ import { _intTo16, intTo16 } from "../custom/util";
last = i;
}
}
bs.write_s_se_u(<string>data, last, changetype<OBJECT>(changetype<usize>(data) - TOTAL_OVERHEAD).rtSize);
bs.write_16(QUOTE);

if (last === 0) {
bs.write_s(data);
bs.write_16(QUOTE)
} else {
bs.write_s_se(<string>data, last, changetype<OBJECT>(changetype<usize>(data) - TOTAL_OVERHEAD).rtSize);
bs.write_16(QUOTE);
}
}
11 changes: 7 additions & 4 deletions assembly/test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import { itoa_fast } from "./custom/itoa";
import { bs } from "./custom/bs";
import { serialize_simd } from "./serialize/string";

const out = changetype<usize>(new ArrayBuffer(40));
itoa_fast(out, 1234567890);
console.log(String.UTF16.decodeUnsafe(out, 20));
const out = new ArrayBuffer(1024);

serialize_simd("hello \"world abc", out);

console.log(String.UTF16.decode(out))
2 changes: 1 addition & 1 deletion bench.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ const bench = new Bench({ time: 1000 })
})
*/
.add("Parse String", () => {
data = JSON.parse("[[],[[]],[[],[[]]]]");
data = JSON.stringify("hello \"world abc");
})
.todo("unimplemented .add");

Expand Down
17 changes: 0 additions & 17 deletions bench/bench-node.js

This file was deleted.

Loading

0 comments on commit 83e37d8

Please sign in to comment.