From eedf240e35e5f1e1e3c687cbf6d76ddf65923852 Mon Sep 17 00:00:00 2001 From: Kate F Date: Tue, 3 Sep 2024 12:24:18 +0100 Subject: [PATCH] First cut at wasm codegen. --- .github/workflows/ci.yml | 9 +- include/fsm/print.h | 1 + src/fsm/main.c | 1 + src/libfsm/print.c | 1 + src/libfsm/print.h | 1 + src/libfsm/print/Makefile | 1 + src/libfsm/print/wasm.c | 397 ++++++++++++++++++++++++++++++++++++++ src/re/main.c | 1 + src/retest/main.c | 2 + src/retest/reperf.c | 2 + src/retest/runner.c | 220 ++++++++++++++++++--- src/retest/runner.h | 10 + src/rx/main.c | 1 + 13 files changed, 621 insertions(+), 26 deletions(-) create mode 100644 src/libfsm/print/wasm.c diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dfb6182f2..d6b45ebdb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -481,7 +481,7 @@ jobs: cc: [ clang, gcc ] make: [ bmake ] debug: [ DEBUG, RELEASE ] # RELEASE=1 is a no-op - lang: [ "vm -x v1", "vm -x v2", asm, c, rust, vmc, vmops, go, goasm, llvm ] + lang: [ "vm -x v1", "vm -x v2", asm, c, rust, vmc, vmops, go, goasm, llvm, wasm2c ] exclude: - os: macos cc: gcc # it's clang anyway @@ -512,6 +512,13 @@ jobs: sudo apt-get install golang go version + - name: Dependencies (Ubuntu/Wabt) + if: matrix.os == 'ubuntu' && matrix.lang == 'wasm2c' + run: | + uname -a + sudo apt-get install wabt + go version + - name: Fetch build uses: actions/cache@v4 id: cache-build diff --git a/include/fsm/print.h b/include/fsm/print.h index ed9699e07..42949b687 100644 --- a/include/fsm/print.h +++ b/include/fsm/print.h @@ -36,6 +36,7 @@ enum fsm_print_lang { FSM_PRINT_SH, /* Shell script (bash dialect) */ FSM_PRINT_VMC, /* ISO C90 code, VM style */ FSM_PRINT_VMDOT, /* Graphviz Dot format, showing VM opcodes */ + FSM_PRINT_WASM, /* Wasm text (desugared wat syntax) */ FSM_PRINT_VMOPS_C, /* VM opcodes as a datastructure */ FSM_PRINT_VMOPS_H, diff --git a/src/fsm/main.c b/src/fsm/main.c index 194a18bcd..70ee67bbd 100644 --- a/src/fsm/main.c +++ b/src/fsm/main.c @@ -168,6 +168,7 @@ lang_name(const char *name) { "llvm", FSM_PRINT_LLVM }, { "sh", FSM_PRINT_SH }, { "go", FSM_PRINT_GO }, + { "wasm", FSM_PRINT_WASM }, { "amd64", FSM_PRINT_AMD64_NASM }, { "amd64_att", FSM_PRINT_AMD64_ATT }, diff --git a/src/libfsm/print.c b/src/libfsm/print.c index 635311706..5fb402e9a 100644 --- a/src/libfsm/print.c +++ b/src/libfsm/print.c @@ -322,6 +322,7 @@ fsm_print(FILE *f, const struct fsm *fsm, case FSM_PRINT_SH: print_vm = fsm_print_sh; break; case FSM_PRINT_VMC: print_vm = fsm_print_vmc; break; case FSM_PRINT_VMDOT: print_vm = fsm_print_vmdot; break; + case FSM_PRINT_WASM: print_ir = fsm_print_wasm; break; case FSM_PRINT_VMOPS_C: print_vm = fsm_print_vmops_c; break; case FSM_PRINT_VMOPS_H: print_vm = fsm_print_vmops_h; break; diff --git a/src/libfsm/print.h b/src/libfsm/print.h index b286f2137..b6722a62d 100644 --- a/src/libfsm/print.h +++ b/src/libfsm/print.h @@ -87,6 +87,7 @@ vm_print_f fsm_print_llvm; vm_print_f fsm_print_rust; vm_print_f fsm_print_sh; vm_print_f fsm_print_vmc; +ir_print_f fsm_print_wasm; vm_print_f fsm_print_vmdot; vm_print_f fsm_print_vmops_c; diff --git a/src/libfsm/print/Makefile b/src/libfsm/print/Makefile index c0015c899..a6c28fa64 100644 --- a/src/libfsm/print/Makefile +++ b/src/libfsm/print/Makefile @@ -17,6 +17,7 @@ SRC += src/libfsm/print/vmasm.c SRC += src/libfsm/print/vmc.c SRC += src/libfsm/print/vmdot.c SRC += src/libfsm/print/vmops.c +SRC += src/libfsm/print/wasm.c .for src in ${SRC:Msrc/libfsm/print/*.c} CFLAGS.${src} += -I src # XXX: for internal.h diff --git a/src/libfsm/print/wasm.c b/src/libfsm/print/wasm.c new file mode 100644 index 000000000..d776c2336 --- /dev/null +++ b/src/libfsm/print/wasm.c @@ -0,0 +1,397 @@ +/* + * Copyright 2018 Katherine Flavel + * + * See LICENCE for the full copyright terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include + +#include "libfsm/internal.h" +#include "libfsm/print.h" + +#include "ir.h" + +// TODO: centralise indicies for locals +// why not use $x for names? +#define LOCAL_STR 0 +#define LOCAL_CHAR 1 +#define LOCAL_STATE 2 + +#define ERROR_STATE (uint32_t) -1 + +static void +transition(FILE *f, unsigned index, unsigned to, + const char *indent) +{ + // no change + if (to == index) { + return; + } + + // set next state + fprintf(f, "%si32.const %u\n", indent, to); + fprintf(f, "%slocal.set %u\n", indent, LOCAL_STATE); +} + +static void +print_endpoint(FILE *f, const struct fsm_options *opt, unsigned char c) +{ + assert(f != NULL); + assert(opt != NULL); + + fprintf(f, " i32.const %u", (unsigned char) c); + + if (opt->comments) { + fprintf(f, " ;; \'"); + json_escputc(f, opt, c); + fprintf(f, "\'"); + } + + fprintf(f, "\n"); +} + +static void +print_range(FILE *f, const struct fsm_options *opt, + const struct ir_range *range) +{ + assert(f != NULL); + assert(opt != NULL); + assert(range != NULL); + + // leaves a boolean on the stack + if (range->end == range->start) { + print_endpoint(f, opt, range->start); + fprintf(f, " local.get %u\n", LOCAL_CHAR); // get current input byte + fprintf(f, " i32.eq\n"); + } else { + print_endpoint(f, opt, range->start); + fprintf(f, " local.get %u\n", LOCAL_CHAR); // get current input byte + fprintf(f, " i32.ge_u\n"); + + print_endpoint(f, opt, range->end); + fprintf(f, " local.get %u\n", LOCAL_CHAR); // get current input byte + fprintf(f, " i32.le_u\n"); + + fprintf(f, " i32.and\n"); + } +} + +static void +print_ranges(FILE *f, const struct fsm_options *opt, + const struct ir_range *ranges, size_t n, + bool complete) +{ + size_t k; + + assert(f != NULL); + assert(opt != NULL); + assert(ranges != NULL); + assert(n > 0); + + /* A single range already leaves its own bool on the stack */ + if (n == 1) { + print_range(f, opt, &ranges[0]); + return; + } + + /* + * For multiple ranges, we emit an if/else chain so that we can + * produce a single boolean and bail out on the first match, + * without evaulating every condition. + * + * This is a || b || c with short-circuit evaulation. + * In any case, we end up with a single bool on the stack. + */ + + /* TODO: would prefer to emit binary search for ranges, + * perhaps with IR support. */ + + for (k = 0; k < n; k++) { + print_range(f, opt, &ranges[k]); + fprintf(f, " if (result i32)\n"); + fprintf(f, " i32.const 1 ;; match \n"); + fprintf(f, " return\n"); + + if (k + 1 < n) { + fprintf(f, " else\n"); + } + } + + if (!complete) { + fprintf(f, " else\n"); + fprintf(f, " i32.const 0 ;; no match \n"); + fprintf(f, " return\n"); + } + + for (k = 0; k < n; k++) { + fprintf(f, " end\n"); + } +} + +static void +print_groups(FILE *f, const struct fsm_options *opt, + const struct ir_group *groups, size_t n, + unsigned index, bool complete, + unsigned default_to) +{ + size_t j; + + assert(f != NULL); + assert(opt != NULL); + assert(groups != NULL); + assert(n > 0); + + /* + * Here leave nothing on the stack, but transition() as side effect. + * + * We prefer this as a side effect rather than accumulating a + * destination on the stack, because we can skip effects for where + * the destination state is unchanged from the current index. + */ +// TODO: explain we need another if/else chain for short-circuit evaulation of the groups +// this one doesn't return a value, because each group transition()s as a side effect +// we can't avoid the if/else chain for a single group, beause we still need to convert bool to transition() per group + + for (j = 0; j < n; j++) { + print_ranges(f, opt, groups[j].ranges, groups[j].n, complete); + + fprintf(f, " if\n"); + transition(f, index, groups[j].to, " "); + + if (j + 1 < n) { + fprintf(f, " else\n"); + } + } + + if (!complete && default_to != index) { + fprintf(f, " else\n"); + transition(f, index, default_to, " "); + } + + for (j = 0; j < n; j++) { + fprintf(f, " end\n"); + } +} + +static int +print_state(FILE *f, + const struct fsm_options *opt, + const struct fsm_hooks *hooks, + const struct ir_state *cs, + unsigned index, + unsigned delta) +{ + assert(f != NULL); + assert(opt != NULL); + assert(hooks != NULL); + assert(cs != NULL); + + /* showing hook in addition to existing content */ + if (cs->isend && hooks->accept != NULL) { + if (-1 == print_hook_accept(f, opt, hooks, + cs->ids, cs->count, + NULL, NULL)) + { + return -1; + } + } + + fprintf(f, " end\n"); + fprintf(f, "\n"); + + fprintf(f, " ;; S%u", index); + if (cs->example != NULL) { + fprintf(f, " \""); + escputs(f, opt, json_escputc, cs->example); + fprintf(f, "\""); + } + fprintf(f, "\n"); + + switch (cs->strategy) { + case IR_NONE: + fprintf(f, " ;; IR_NONE\n"); + break; + + case IR_SAME: + fprintf(f, " ;; IR_SAME\n"); + transition(f, index, cs->u.same.to, " "); + break; + + case IR_COMPLETE: + fprintf(f, " ;; IR_COMPLETE\n"); + print_groups(f, opt, cs->u.complete.groups, cs->u.complete.n, index, true, ERROR_STATE); + break; + + case IR_PARTIAL: + fprintf(f, " ;; IR_PARTIAL\n"); + print_groups(f, opt, cs->u.partial.groups, cs->u.partial.n, index, false, ERROR_STATE); + fprintf(f, "\n"); + break; + + case IR_DOMINANT: + fprintf(f, " ;; IR_DOMINANT\n"); + print_groups(f, opt, cs->u.dominant.groups, cs->u.dominant.n, index, false, cs->u.dominant.mode); + break; + + case IR_ERROR: + fprintf(f, " ;; IR_ERROR\n"); + print_ranges(f, opt, cs->u.error.error.ranges, cs->u.error.error.n, false); + fprintf(f, " if\n"); + transition(f, index, ERROR_STATE, " "); + + if (cs->u.error.n > 0) { + fprintf(f, " else\n"); + print_groups(f, opt, cs->u.error.groups, cs->u.error.n, index, true, cs->u.error.mode); + } + + fprintf(f, " end\n"); + break; + + case IR_TABLE: + fprintf(f, " ;; IR_TABLE\n"); + fprintf(f, " local.get %u\n", LOCAL_CHAR); // get current input byte + fprintf(f, " drop\n"); // TODO: do something with it ... + // TODO: would emit br_table here + assert(!"unreached"); + break; + + default: + ; + } + + fprintf(f, " br %u ;; continue the loop, %u block%s up\n", delta - 1, delta - 1, &"s"[delta - 1 == 1]); + + return 0; +} + +int +fsm_print_wasm(FILE *f, + const struct fsm_options *opt, + const struct fsm_hooks *hooks, + const struct ret_list *retlist, + const struct ir *ir) +{ + size_t i; + + assert(f != NULL); + assert(opt != NULL); + assert(hooks != NULL); + assert(ir != NULL); + assert(retlist != NULL); + +// TODO: prefix etc + fprintf(f, "(module\n"); + fprintf(f, " (memory 1 1)\n"); + fprintf(f, " (func (param i32) (result i32) (local i32 i32)\n"); +// fprintf(f, " ;; s is in LOCAL_STR (the parameter) and we'll keep p there too\n"); +// fprintf(f, " ;; we'll cache *p in LOCAL_CHAR\n"); +// fprintf(f, "\n"); + +// fprintf(f, " ;; for (p = s; *p != '\0'; p++)\n"); + fprintf(f, " block\n"); // introduce a branch target for getting out of the loop + fprintf(f, " loop\n"); // begin the outer loop + fprintf(f, "\n"); + + fprintf(f, " local.get %u\n", LOCAL_STATE); + fprintf(f, " i32.const %u\n", ERROR_STATE); + fprintf(f, " i32.eq\n"); // test if the current state if error + fprintf(f, " br_if 1\n"); // exit the outer block if so + fprintf(f, "\n"); + +// fprintf(f, " ;; fetch *p\n"); + fprintf(f, " local.get %u\n", LOCAL_STR); // get address of next byte + fprintf(f, " i32.load8_u 0\n"); // load byte at that address + fprintf(f, " local.tee %u\n", LOCAL_CHAR); // save the current input byte and keep it on the stack + fprintf(f, "\n"); + +// fprintf(f, " ;; *p != '\0'\n"); + fprintf(f, " i32.eqz\n"); // test if the byte is zero + fprintf(f, " br_if 1\n"); // exit the outer block if so + fprintf(f, "\n"); + +// fprintf(f, " ;; p++\n"); + fprintf(f, " local.get %u\n", LOCAL_STR); + fprintf(f, " i32.const 1\n"); + fprintf(f, " i32.add\n"); + fprintf(f, " local.set %u\n", LOCAL_STR); + fprintf(f, "\n"); + + // the current state will be in LOCAL_STATE + // locals are implicitly initialized to 0 + if (ir->start != 0) { + fprintf(f, " i32.const %u\n", ir->start); + fprintf(f, " local.set %u\n", LOCAL_STATE); + fprintf(f, "\n"); + } + +// fprintf(f, " ;; switch (state)\n"); +// fprintf(f, " ;; we need a block for each state: we'll start with a jump-table that\n"); +// fprintf(f, " ;; branches out of the block which ends before the code we want to run\n"); + for (i = 0; i < ir->n; i++) { + fprintf(f, " block ;; S%zu\n", i); + } + fprintf(f, " local.get %u\n", LOCAL_STATE); + fprintf(f, " br_table"); + for (i = 0; i < ir->n; i++) { + fprintf(f, " %zu", i); + } + fprintf(f, "\n"); + + for (i = 0; i < ir->n; i++) { + if (i == ERROR_STATE) { + errno = EINVAL; + return -1; + } + + if (-1 == print_state(f, opt, hooks, &ir->states[i], i, ir->n - i)) { + return -1; + } + } + + fprintf(f, " end\n"); // end of loop + fprintf(f, " end\n"); // end of outer block + fprintf(f, "\n"); + +// TODO: use retlist +// need to index state -> retlist entry, maybe index into data for that +// seems better than scattering it throughout the code as a local maybe +// +// fprintf(f, "\t\t\t\"end\": %s,\n", cs->isend ? "true" : "false"); +// if (cs->isend && cs->count > 0) { +// fprintf(f, "\t\t\t\"end_id\": ["); +// for (size_t i = 0; i < cs->count; i++) { +// fprintf(f, "%u", cs->ids[i]); +// } +// fprintf(f, "],\n"); +// } + +// TODO: can i return from within a block and get rid of LOCAL_STATE? +// "If there are more values that the function's return type specifies, +// then the excess values are popped from the stack and discarded, and the last N values are returned." + fprintf(f, " local.get %u\n", LOCAL_STATE); + fprintf(f, " i32.const %u\n", ERROR_STATE); + fprintf(f, " i32.ne\n"); // true if the current state is not an error + fprintf(f, " return\n"); + fprintf(f, " )\n"); + fprintf(f, ")\n"); + + return 0; +} + diff --git a/src/re/main.c b/src/re/main.c index ff575ca03..553f8fd44 100644 --- a/src/re/main.c +++ b/src/re/main.c @@ -124,6 +124,7 @@ lang_name(const char *name, enum fsm_print_lang *fsm_lang, enum ast_print_lang * { "rust", FSM_PRINT_RUST }, { "sh", FSM_PRINT_SH }, { "vmc", FSM_PRINT_VMC }, + { "wasm", FSM_PRINT_WASM }, { "vmdot", FSM_PRINT_VMDOT }, { "vmops_c", FSM_PRINT_VMOPS_C }, diff --git a/src/retest/main.c b/src/retest/main.c index 53eba216c..12ab7ca85 100644 --- a/src/retest/main.c +++ b/src/retest/main.c @@ -1288,6 +1288,8 @@ main(int argc, char *argv[]) impl = IMPL_VMC; } else if (strcmp(optarg, "vmops") == 0) { impl = IMPL_VMOPS; + } else if (strcmp(optarg, "wasm2c") == 0) { + impl = IMPL_WASM2C; } else { fprintf(stderr, "unknown argument to -l: %s\n", optarg); usage(); diff --git a/src/retest/reperf.c b/src/retest/reperf.c index 4536bcf5f..17c1b57a0 100644 --- a/src/retest/reperf.c +++ b/src/retest/reperf.c @@ -1172,6 +1172,8 @@ main(int argc, char *argv[]) impl = IMPL_VMC; } else if (strcmp(optarg, "vmops") == 0) { impl = IMPL_VMOPS; + } else if (strcmp(optarg, "wasm2c") == 0) { + impl = IMPL_WASM2C; } else { fprintf(stderr, "unknown argument to -l: %s\n", optarg); usage(); diff --git a/src/retest/runner.c b/src/retest/runner.c index 49cd611af..538010657 100644 --- a/src/retest/runner.c +++ b/src/retest/runner.c @@ -87,13 +87,14 @@ print(const struct fsm *fsm, int e; switch (impl) { - case IMPL_C: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_C); break; - case IMPL_RUST: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_RUST); break; - case IMPL_LLVM: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_LLVM); break; - case IMPL_VMC: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_VMC); break; - case IMPL_GOASM: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_AMD64_GO); break; - case IMPL_VMASM: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_AMD64_ATT); break; - case IMPL_GO: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_GO); break; + case IMPL_C: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_C); break; + case IMPL_RUST: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_RUST); break; + case IMPL_LLVM: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_LLVM); break; + case IMPL_VMC: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_VMC); break; + case IMPL_GOASM: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_AMD64_GO); break; + case IMPL_VMASM: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_AMD64_ATT); break; + case IMPL_GO: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_GO); break; + case IMPL_WASM2C: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_WASM); break; case IMPL_VMOPS: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_VMOPS_H) @@ -241,7 +242,7 @@ compile(enum implementation impl, } case IMPL_VMASM: { - char tmp_o[] = "/tmp/fsmcompile_o-XXXXXX.o"; + char tmp_o[] = "/tmp/fsmcompile-XXXXXX.o"; int fd_o; as = getenv("AS"); @@ -279,6 +280,139 @@ compile(enum implementation impl, break; } + case IMPL_WASM2C: { + char tmp_c[] = "/tmp/fsmcompile-XXXXXX.c"; + int fd_c; + + { + const char *wat2wasm; + const char *wasm2c, *wasm2cflags, *wasmrt_impl; + + wat2wasm = getenv("WAT2WASM"); + + wasm2c = getenv("WASM2C"); + wasm2cflags = getenv("WASM2CFLAGS"); + wasmrt_impl = getenv("WASMRTIMPL"); + + fd_c = xmkstemps(tmp_c); + + if (0 != systemf("%s --enable-multi-memory %s -o /dev/stdout | %s %s - > %s", + wat2wasm ? wat2wasm : "wat2wasm", + tmp_src, + wasm2c ? wasm2c : "wasm2c", + wasm2cflags ? wasm2cflags : "", + tmp_c, tmp_src)) + { + return 0; + } + +/* XXX: +Uninitialized bytes in read_sigaction at offset 0 inside [0x7fffb055f280, 8) +==1861==WARNING: MemorySanitizer: use-of-uninitialized-value +*/ + + /* append trampoline */ + { + FILE *f; + + f = fdopen(fd_c, "a"); + if (f == NULL) { + perror(tmp_c); + return 0; + } + + /* trampoline for wabt 1.0.27 */ + + fprintf(f, "#include \n"); + fprintf(f, "#include \n"); + fprintf(f, "\n"); + + fprintf(f, "int retest_trampoline(const char *s) {\n"); + fprintf(f, " size_t n;\n"); + fprintf(f, "\n"); + + fprintf(f, " fsm_init();\n"); + fprintf(f, "\n"); + + /* TODO: we could grow the memory region to size. + * but a page is 64kB, probably enough for our test strings */ + fprintf(f, " n = strlen(s);\n"); + fprintf(f, " if (n + 1 > w2c_M0.size) {\n"); + fprintf(f, " fprintf(stderr, \"overflow\");\n"); + fprintf(f, " abort();\n"); + fprintf(f, " }\n"); + fprintf(f, "\n"); + +/* XXX: placeholder */ + fprintf(f, " memcpy(w2c_M0.data, s, n);\n"); + + /* TODO: would deal with different IO APIs here */ + fprintf(f, " u32 p = 0;\n"); + fprintf(f, " u32 r = w2c_f0(p);\n"); + fprintf(f, "\n"); + + /* TODO: would handle endids here */ + + fprintf(f, " return r;\n"); + fprintf(f, "}\n"); + + if (0 != fclose(f)) { + perror(tmp_c); + return 0; + } + } + + // XXX: hacky + if (0 != systemf("sed -i '/^#include \"wasm\\.h\"/d' %s", tmp_c)) { + return 0; + } + + /* + * wasm2c would also write to src.h when writing out to src.c + * However it doesn't write to src.h when output is stdout. + */ + + /* + * The wasm2c readme: + * + * With GCC 11, adding the command-line arguments + * -fno-optimize-sibling-calls -frounding-math -fsignaling-nans + * appears to be sufficient. + * + * With clang 14, just -fno-optimize-sibling-calls -frounding-math + * appears to be sufficient. + * + * (however -fsignaling-nans is actually not supported for clang) + */ + + if (0 != systemf("%s %s -fPIC -shared %s %s %s %s %s -o %s", + cc ? cc : "gcc", + cflags ? cflags : "", + "-D WASM_RT_MODULE_PREFIX=fsm_ -D WASM_RT_SANITY_CHECKS " + "-D WASM_RT_MEMCHECK_SIGNAL_HANDLER_POSIX", + (cc && (strcmp(cc, "clang") == 0 || strcmp(cc, "gcc") == 0)) + ? "-fno-optimize-sibling-calls -frounding-math" + : "", + (cc && strcmp(cc, "clang") == 0) + ? "" + : "-fsignaling-nans", + wasmrt_impl ? wasmrt_impl : "/usr/src/wasm2c/wasm-rt-impl.c", + tmp_c, tmp_so)) + { + return 0; + } + + /* fd_c implicitly closed by fclose() */ + + if (-1 == unlinkat(-1, tmp_c, 0)) { + perror(tmp_c); + return 0; + } + } + + break; + } + case IMPL_INTERPRET: assert(!"unreached"); break; @@ -294,27 +428,27 @@ runner_init_compiled(struct fsm *fsm, { void *h; - r->impl = impl; - /* The Go compiler needs an extension on tmp_src so it knows * it's a file not a package. Since we're doing that, it's * easier to do the same for everyone. */ - char tmp_src_go[] = "/tmp/fsmcompile_src-XXXXXX.go"; - char tmp_src_c[] = "/tmp/fsmcompile_src-XXXXXX.c"; - char tmp_src_rs[] = "/tmp/fsmcompile_src-XXXXXX.rs"; - char tmp_src_ll[] = "/tmp/fsmcompile_src-XXXXXX.ll"; - char tmp_src_s[] = "/tmp/fsmcompile_src-XXXXXX.s"; + char tmp_src_go[] = "/tmp/fsmcompile_src-XXXXXX.go"; + char tmp_src_c[] = "/tmp/fsmcompile_src-XXXXXX.c"; + char tmp_src_rs[] = "/tmp/fsmcompile_src-XXXXXX.rs"; + char tmp_src_ll[] = "/tmp/fsmcompile_src-XXXXXX.ll"; + char tmp_src_s[] = "/tmp/fsmcompile_src-XXXXXX.s"; + char tmp_src_wat[] = "/tmp/fsmcompile_src-XXXXXX.wat"; char *tmp_src; switch (impl) { case IMPL_VMOPS: case IMPL_C: - case IMPL_VMC: tmp_src = tmp_src_c; break; - case IMPL_RUST: tmp_src = tmp_src_rs; break; - case IMPL_LLVM: tmp_src = tmp_src_ll; break; + case IMPL_VMC: tmp_src = tmp_src_c; break; + case IMPL_RUST: tmp_src = tmp_src_rs; break; + case IMPL_LLVM: tmp_src = tmp_src_ll; break; case IMPL_GOASM: - case IMPL_VMASM: tmp_src = tmp_src_s; break; - case IMPL_GO: tmp_src = tmp_src_go; break; + case IMPL_VMASM: tmp_src = tmp_src_s; break; + case IMPL_GO: tmp_src = tmp_src_go; break; + case IMPL_WASM2C: tmp_src = tmp_src_wat; break; case IMPL_INTERPRET: assert(!"unreached"); @@ -352,11 +486,11 @@ runner_init_compiled(struct fsm *fsm, perror(tmp_so); return ERROR_FILE_IO; } - } - if (-1 == unlinkat(-1, tmp_src, 0)) { - perror(tmp_src); - return 0; + if (-1 == unlinkat(-1, tmp_src, 0)) { + perror(tmp_src); + return 0; + } } /* XXX: depends on IO API */ @@ -397,6 +531,11 @@ runner_init_compiled(struct fsm *fsm, r->u.impl_asm.func = (int (*)(const unsigned char *, size_t)) (uintptr_t) dlsym(h, "fsm_match"); break; + case IMPL_WASM2C: + r->u.impl_wasm2c.h = h; + r->u.impl_wasm2c.func = (int (*)(const unsigned char *)) (uintptr_t) dlsym(h, "retest_trampoline"); + break; + case IMPL_INTERPRET: break; } @@ -416,6 +555,8 @@ fsm_runner_initialize(struct fsm *fsm, const struct fsm_options *opt, *r = zero; + r->impl = impl; + switch (impl) { case IMPL_C: case IMPL_LLVM: @@ -425,6 +566,7 @@ fsm_runner_initialize(struct fsm *fsm, const struct fsm_options *opt, case IMPL_VMOPS: case IMPL_GO: case IMPL_GOASM: + case IMPL_WASM2C: return runner_init_compiled(fsm, opt, r, impl); case IMPL_INTERPRET: @@ -433,7 +575,7 @@ fsm_runner_initialize(struct fsm *fsm, const struct fsm_options *opt, fsm_free(fsm); return ERROR_COMPILING_BYTECODE; } - r->impl = impl; + r->u.impl_vm.vm = vm; return ERROR_NONE; } @@ -486,6 +628,12 @@ fsm_runner_finalize(struct fsm_runner *r) } break; + case IMPL_WASM2C: + if (r->u.impl_wasm2c.h != NULL) { + dlclose(r->u.impl_wasm2c.h); + } + break; + default: assert(!"should not reach"); } @@ -524,6 +672,28 @@ fsm_runner_run(const struct fsm_runner *r, const char *s, size_t n) case IMPL_INTERPRET: assert(r->u.impl_vm.vm != NULL); return fsm_vm_match_buffer(r->u.impl_vm.vm, s, n); + + /* + * TODO: other ways we could run wasm, with the goal of avoiding + * a link-time dependency from retest: + * + * wat2wasm --emable-multi-memory -o /dev/stdout %s | wasm-interp - + * wasmtime %s + * + * A wasm trampoline (maybe simple enough to handwrite as .wat) + * would write endids to stdout, redirect to tmp file. + * The result is the exit status code + * + * We cannot use 'wasmtime compile' because despite producing an ELF + * file, the contents aren't executable (there is no runtime), + * it's just using ELF as a container format for internal data. + * + * Could also use w2c2, which is the same approach as wasm2c. + */ + + case IMPL_WASM2C: + assert(r->u.impl_wasm2c.func != NULL); + return r->u.impl_wasm2c.func((const unsigned char *) s); } assert(!"should not reach"); diff --git a/src/retest/runner.h b/src/retest/runner.h index 40b269eb0..71b68be98 100644 --- a/src/retest/runner.h +++ b/src/retest/runner.h @@ -35,6 +35,7 @@ enum implementation { IMPL_VMASM, IMPL_VMC, IMPL_VMOPS, + IMPL_WASM2C, }; struct fsm_runner { @@ -70,6 +71,15 @@ struct fsm_runner { struct { struct fsm_dfavm *vm; } impl_vm; + + struct { + const char *tmp_src; + } impl_wat; + + struct { + void *h; + int (*func)(const unsigned char *); + } impl_wasm2c; } u; }; diff --git a/src/rx/main.c b/src/rx/main.c index 0328f900a..0c96ee4d8 100644 --- a/src/rx/main.c +++ b/src/rx/main.c @@ -713,6 +713,7 @@ print_name(const char *name, enum fsm_ambig ambig) { "irjson", FSM_PRINT_IRJSON, AMBIG_ANY }, { "llvm", FSM_PRINT_LLVM, AMBIG_ANY }, { "vmdot", FSM_PRINT_VMDOT, AMBIG_ANY }, + { "wasm", FSM_PRINT_WASM, AMBIG_ANY }, { "vmops_c", FSM_PRINT_VMOPS_C, AMBIG_ANY }, { "vmops_h", FSM_PRINT_VMOPS_H, AMBIG_ANY }, { "vmops_main", FSM_PRINT_VMOPS_MAIN, AMBIG_ANY },