diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce24dbcf3..087057bfe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -481,7 +481,7 @@ jobs: cc: [ clang, gcc ] make: [ bmake ] debug: [ DEBUG, RELEASE ] # RELEASE=1 is a no-op - lang: [ "vm -x v1", "vm -x v2", asm, c, rust, vmc, vmops, go, goasm, llvm ] + lang: [ "vm -x v1", "vm -x v2", asm, c, rust, vmc, vmops, go, goasm, llvm, wabt ] exclude: - os: macos cc: gcc # it's clang anyway @@ -512,6 +512,13 @@ jobs: sudo apt-get install golang go version + - name: Dependencies (Ubuntu/Wabt) + if: matrix.os == 'ubuntu' && matrix.lang == 'wabt' + run: | + uname -a + sudo apt-get install wabt + go version + - name: Fetch build uses: actions/cache@v3 id: cache-build diff --git a/include/fsm/print.h b/include/fsm/print.h index ed9699e07..42949b687 100644 --- a/include/fsm/print.h +++ b/include/fsm/print.h @@ -36,6 +36,7 @@ enum fsm_print_lang { FSM_PRINT_SH, /* Shell script (bash dialect) */ FSM_PRINT_VMC, /* ISO C90 code, VM style */ FSM_PRINT_VMDOT, /* Graphviz Dot format, showing VM opcodes */ + FSM_PRINT_WASM, /* Wasm text (desugared wat syntax) */ FSM_PRINT_VMOPS_C, /* VM opcodes as a datastructure */ FSM_PRINT_VMOPS_H, diff --git a/src/fsm/main.c b/src/fsm/main.c index 1d60ce5c7..0e2ebef21 100644 --- a/src/fsm/main.c +++ b/src/fsm/main.c @@ -168,6 +168,7 @@ lang_name(const char *name) { "llvm", FSM_PRINT_LLVM }, { "sh", FSM_PRINT_SH }, { "go", FSM_PRINT_GO }, + { "wasm", FSM_PRINT_WASM }, { "amd64", FSM_PRINT_AMD64_NASM }, { "amd64_att", FSM_PRINT_AMD64_ATT }, diff --git a/src/libfsm/print.c b/src/libfsm/print.c index cdded5504..7d0f2f3c7 100644 --- a/src/libfsm/print.c +++ b/src/libfsm/print.c @@ -322,6 +322,7 @@ fsm_print(FILE *f, const struct fsm *fsm, case FSM_PRINT_SH: print_vm = fsm_print_sh; break; case FSM_PRINT_VMC: print_vm = fsm_print_vmc; break; case FSM_PRINT_VMDOT: print_vm = fsm_print_vmdot; break; + case FSM_PRINT_WASM: print_ir = fsm_print_wasm; break; case FSM_PRINT_VMOPS_C: print_vm = fsm_print_vmops_c; break; case FSM_PRINT_VMOPS_H: print_vm = fsm_print_vmops_h; break; diff --git a/src/libfsm/print.h b/src/libfsm/print.h index b286f2137..b6722a62d 100644 --- a/src/libfsm/print.h +++ b/src/libfsm/print.h @@ -87,6 +87,7 @@ vm_print_f fsm_print_llvm; vm_print_f fsm_print_rust; vm_print_f fsm_print_sh; vm_print_f fsm_print_vmc; +ir_print_f fsm_print_wasm; vm_print_f fsm_print_vmdot; vm_print_f fsm_print_vmops_c; diff --git a/src/libfsm/print/Makefile b/src/libfsm/print/Makefile index c0015c899..a6c28fa64 100644 --- a/src/libfsm/print/Makefile +++ b/src/libfsm/print/Makefile @@ -17,6 +17,7 @@ SRC += src/libfsm/print/vmasm.c SRC += src/libfsm/print/vmc.c SRC += src/libfsm/print/vmdot.c SRC += src/libfsm/print/vmops.c +SRC += src/libfsm/print/wasm.c .for src in ${SRC:Msrc/libfsm/print/*.c} CFLAGS.${src} += -I src # XXX: for internal.h diff --git a/src/libfsm/print/wasm.c b/src/libfsm/print/wasm.c new file mode 100644 index 000000000..85d456d24 --- /dev/null +++ b/src/libfsm/print/wasm.c @@ -0,0 +1,395 @@ +/* + * Copyright 2018 Katherine Flavel + * + * See LICENCE for the full copyright terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include + +#include "libfsm/internal.h" +#include "libfsm/print.h" + +#include "ir.h" + +// TODO: centralise indicies for locals +// why not use $x for names? +#define LOCAL_STR 0 +#define LOCAL_CHAR 1 +#define LOCAL_STATE 2 + +#define ERROR_STATE (uint32_t) -1 + +static void +transition(FILE *f, unsigned index, unsigned to, + const char *indent) +{ + // no change + if (to == index) { + return; + } + + // set next state + fprintf(f, "%si32.const %u\n", indent, to); + fprintf(f, "%slocal.set %u\n", indent, LOCAL_STATE); +} + +static void +print_endpoint(FILE *f, const struct fsm_options *opt, unsigned char c) +{ + assert(f != NULL); + assert(opt != NULL); + + fprintf(f, " i32.const %u", (unsigned char) c); + + if (opt->comments) { + fprintf(f, " ;; \'"); + json_escputc(f, opt, c); + fprintf(f, "\'"); + } + + fprintf(f, "\n"); +} + +static void +print_range(FILE *f, const struct fsm_options *opt, + const struct ir_range *range) +{ + assert(f != NULL); + assert(opt != NULL); + assert(range != NULL); + + // leaves a boolean on the stack + if (range->end == range->start) { + print_endpoint(f, opt, range->start); + fprintf(f, " local.get %u\n", LOCAL_CHAR); // get current input byte + fprintf(f, " i32.eq\n"); + } else { + print_endpoint(f, opt, range->start); + fprintf(f, " local.get %u\n", LOCAL_CHAR); // get current input byte + fprintf(f, " i32.ge_u\n"); + + print_endpoint(f, opt, range->end); + fprintf(f, " local.get %u\n", LOCAL_CHAR); // get current input byte + fprintf(f, " i32.le_u\n"); + + fprintf(f, " i32.and\n"); + } +} + +static void +print_ranges(FILE *f, const struct fsm_options *opt, + const struct ir_range *ranges, size_t n, + bool complete) +{ + size_t k; + + assert(f != NULL); + assert(opt != NULL); + assert(ranges != NULL); + assert(n > 0); + + /* A single range already leaves its own bool on the stack */ + if (n == 1) { + print_range(f, opt, &ranges[0]); + return; + } + + /* + * For multiple ranges, we emit an if/else chain so that we can + * produce a single boolean and bail out on the first match, + * without evaulating every condition. + * + * This is a || b || c with short-circuit evaulation. + * In any case, we end up with a single bool on the stack. + */ + + /* TODO: would prefer to emit binary search for ranges, + * perhaps with IR support. */ + + for (k = 0; k < n; k++) { + print_range(f, opt, &ranges[k]); + fprintf(f, " if (result i32)\n"); + fprintf(f, " i32.const 1 ;; match \n"); + fprintf(f, " return\n"); + + if (k + 1 < n) { + fprintf(f, " else\n"); + } + } + + if (!complete) { + fprintf(f, " else\n"); + fprintf(f, " i32.const 0 ;; no match \n"); + fprintf(f, " return\n"); + } + + for (k = 0; k < n; k++) { + fprintf(f, " end\n"); + } +} + +static void +print_groups(FILE *f, const struct fsm_options *opt, + const struct ir_group *groups, size_t n, + unsigned index, bool complete, + unsigned default_to) +{ + size_t j; + + assert(f != NULL); + assert(opt != NULL); + assert(groups != NULL); + assert(n > 0); + + /* + * Here leave nothing on the stack, but transition() as side effect. + * + * We prefer this as a side effect rather than accumulating a + * destination on the stack, because we can skip effects for where + * the destination state is unchanged from the current index. + */ +// TODO: explain we need another if/else chain for short-circuit evaulation of the groups +// this one doesn't return a value, because each group transition()s as a side effect +// we can't avoid the if/else chain for a single group, beause we still need to convert bool to transition() per group + + for (j = 0; j < n; j++) { + print_ranges(f, opt, groups[j].ranges, groups[j].n, complete); + + fprintf(f, " if\n"); + transition(f, index, groups[j].to, " "); + + if (j + 1 < n) { + fprintf(f, " else\n"); + } + } + + if (!complete && default_to != index) { + fprintf(f, " else\n"); + transition(f, index, default_to, " "); + } + + for (j = 0; j < n; j++) { + fprintf(f, " end\n"); + } +} + +static int +print_state(FILE *f, + const struct fsm_options *opt, + const struct fsm_hooks *hooks, + const struct ir_state *cs, + unsigned index, + unsigned delta) +{ + assert(f != NULL); + assert(opt != NULL); + assert(hooks != NULL); + assert(cs != NULL); + + /* showing hook in addition to existing content */ + if (cs->isend && hooks->accept != NULL) { + if (-1 == print_hook_accept(f, opt, hooks, + cs->endids.ids, cs->endids.count, + NULL, NULL)) + { + return -1; + } + } + + fprintf(f, " end\n"); + fprintf(f, "\n"); + + fprintf(f, " ;; S%u", index); + if (cs->example != NULL) { + fprintf(f, " \""); + escputs(f, opt, json_escputc, cs->example); + fprintf(f, "\""); + } + fprintf(f, "\n"); + + switch (cs->strategy) { + case IR_NONE: + fprintf(f, " ;; IR_NONE\n"); + break; + + case IR_SAME: + fprintf(f, " ;; IR_SAME\n"); + transition(f, index, cs->u.same.to, " "); + break; + + case IR_COMPLETE: + fprintf(f, " ;; IR_COMPLETE\n"); + print_groups(f, opt, cs->u.complete.groups, cs->u.complete.n, index, true, ERROR_STATE); + break; + + case IR_PARTIAL: + fprintf(f, " ;; IR_PARTIAL\n"); + print_groups(f, opt, cs->u.partial.groups, cs->u.partial.n, index, false, ERROR_STATE); + fprintf(f, "\n"); + break; + + case IR_DOMINANT: + fprintf(f, " ;; IR_DOMINANT\n"); + print_groups(f, opt, cs->u.dominant.groups, cs->u.dominant.n, index, false, cs->u.dominant.mode); + break; + + case IR_ERROR: + fprintf(f, " ;; IR_ERROR\n"); + print_ranges(f, opt, cs->u.error.error.ranges, cs->u.error.error.n, false); + fprintf(f, " if\n"); + transition(f, index, ERROR_STATE, " "); + + if (cs->u.error.n > 0) { + fprintf(f, " else\n"); + print_groups(f, opt, cs->u.error.groups, cs->u.error.n, index, true, cs->u.error.mode); + } + + fprintf(f, " end\n"); + break; + + case IR_TABLE: + fprintf(f, " ;; IR_TABLE\n"); + fprintf(f, " local.get %u\n", LOCAL_CHAR); // get current input byte + fprintf(f, " drop\n"); // TODO: do something with it ... + // TODO: would emit br_table here + assert(!"unreached"); + break; + + default: + ; + } + + fprintf(f, " br %u ;; continue the loop, %u block%s up\n", delta - 1, delta - 1, &"s"[delta - 1 == 1]); + + return 0; +} + +int +fsm_print_wasm(FILE *f, + const struct fsm_options *opt, + const struct fsm_hooks *hooks, + const struct ret_list *retlist, + const struct ir *ir) +{ + size_t i; + + assert(f != NULL); + assert(opt != NULL); + assert(hooks != NULL); + assert(ir != NULL); + assert(retlist != NULL); + +// TODO: prefix etc + fprintf(f, "(module\n"); + fprintf(f, " (memory 1 1)\n"); + fprintf(f, " (func (param i32) (result i32) (local i32 i32)\n"); +// fprintf(f, " ;; s is in LOCAL_STR (the parameter) and we'll keep p there too\n"); +// fprintf(f, " ;; we'll cache *p in LOCAL_CHAR\n"); +// fprintf(f, "\n"); + +// fprintf(f, " ;; for (p = s; *p != '\0'; p++)\n"); + fprintf(f, " block\n"); // introduce a branch target for getting out of the loop + fprintf(f, " loop\n"); // begin the outer loop + fprintf(f, "\n"); + + fprintf(f, " local.get %u\n", LOCAL_STATE); + fprintf(f, " i32.const %u\n", ERROR_STATE); + fprintf(f, " i32.eq\n"); // test if the current state if error + fprintf(f, " br_if 1\n"); // exit the outer block if so + fprintf(f, "\n"); + +// fprintf(f, " ;; fetch *p\n"); + fprintf(f, " local.get %u\n", LOCAL_STR); // get address of next byte + fprintf(f, " i32.load8_u 0\n"); // load byte at that address + fprintf(f, " local.tee %u\n", LOCAL_CHAR); // save the current input byte and keep it on the stack + fprintf(f, "\n"); + +// fprintf(f, " ;; *p != '\0'\n"); + fprintf(f, " i32.eqz\n"); // test if the byte is zero + fprintf(f, " br_if 1\n"); // exit the outer block if so + fprintf(f, "\n"); + +// fprintf(f, " ;; p++\n"); + fprintf(f, " local.get %u\n", LOCAL_STR); + fprintf(f, " i32.const 1\n"); + fprintf(f, " i32.add\n"); + fprintf(f, " local.set %u\n", LOCAL_STR); + fprintf(f, "\n"); + + // the current state will be in LOCAL_STATE + // locals are implicitly initialized to 0 + if (ir->start != 0) { + fprintf(f, " i32.const %u\n", ir->start); + fprintf(f, " local.set %u\n", LOCAL_STATE); + fprintf(f, "\n"); + } + +// fprintf(f, " ;; switch (state)\n"); +// fprintf(f, " ;; we need a block for each state: we'll start with a jump-table that\n"); +// fprintf(f, " ;; branches out of the block which ends before the code we want to run\n"); + for (i = 0; i < ir->n; i++) { + fprintf(f, " block ;; S%zu\n", i); + } + fprintf(f, " local.get %u\n", LOCAL_STATE); + fprintf(f, " br_table"); + for (i = 0; i < ir->n; i++) { + fprintf(f, " %zu", i); + } + fprintf(f, "\n"); + + for (i = 0; i < ir->n; i++) { + if (i == ERROR_STATE) { + errno = EINVAL; + return -1; + } + + if (-1 == print_state(f, opt, hooks, &ir->states[i], i, ir->n - i)) { + return -1; + } + } + + fprintf(f, " end\n"); // end of loop + fprintf(f, " end\n"); // end of outer block + fprintf(f, "\n"); + + // TODO: if no_state, error + +// TODO: use retlist instead +// need to index state -> retlist entry, maybe index into data for that +// seems better than scattering it throughout the code as a local maybe +// +// fprintf(f, "\t\t\t\"end\": %s,\n", cs->isend ? "true" : "false"); +// if (cs->isend && cs->endids.count > 0) { +// fprintf(f, "\t\t\t\"end_id\": ["); +// for (size_t i = 0; i < cs->endids.count; i++) { +// fprintf(f, "%u", cs->endids.ids[i]); +// } +// fprintf(f, "],\n"); +// } + + fprintf(f, " ;; return 1\n"); + fprintf(f, " i32.const 1\n"); + fprintf(f, " return\n"); + fprintf(f, " )\n"); + fprintf(f, ")\n"); + + return 0; +} + diff --git a/src/re/main.c b/src/re/main.c index 0cf7a8dfd..033246b5c 100644 --- a/src/re/main.c +++ b/src/re/main.c @@ -124,6 +124,7 @@ lang_name(const char *name, enum fsm_print_lang *fsm_lang, enum ast_print_lang * { "rust", FSM_PRINT_RUST }, { "sh", FSM_PRINT_SH }, { "vmc", FSM_PRINT_VMC }, + { "wasm", FSM_PRINT_WASM }, { "vmdot", FSM_PRINT_VMDOT }, { "vmops_c", FSM_PRINT_VMOPS_C }, diff --git a/src/rx/main.c b/src/rx/main.c index 0328f900a..0c96ee4d8 100644 --- a/src/rx/main.c +++ b/src/rx/main.c @@ -713,6 +713,7 @@ print_name(const char *name, enum fsm_ambig ambig) { "irjson", FSM_PRINT_IRJSON, AMBIG_ANY }, { "llvm", FSM_PRINT_LLVM, AMBIG_ANY }, { "vmdot", FSM_PRINT_VMDOT, AMBIG_ANY }, + { "wasm", FSM_PRINT_WASM, AMBIG_ANY }, { "vmops_c", FSM_PRINT_VMOPS_C, AMBIG_ANY }, { "vmops_h", FSM_PRINT_VMOPS_H, AMBIG_ANY }, { "vmops_main", FSM_PRINT_VMOPS_MAIN, AMBIG_ANY },