diff --git a/include/fsm/print.h b/include/fsm/print.h index 453d3d915..ed9699e07 100644 --- a/include/fsm/print.h +++ b/include/fsm/print.h @@ -72,6 +72,10 @@ struct fsm_hooks { int (*reject)(FILE *, const struct fsm_options *opt, void *lang_opaque, void *hook_opaque); + int (*comment)(FILE *, const struct fsm_options *opt, + const fsm_end_id_t *ids, size_t count, + void *hook_opaque); + /* only called for AMBIG_ERROR; see opt.ambig */ int (*conflict)(FILE *, const struct fsm_options *opt, const fsm_end_id_t *ids, size_t count, @@ -83,7 +87,17 @@ struct fsm_hooks { /* * Print an FSM to the given file stream. The output is written in the format - * specified. + * specified by the lang enum. + * + * Not all languages support all options, and fsm_print will ENOTSUP where + * these are not possible. This is different to when an option is possible + * but simply not yet implemented, where fsm_print() will print a message + * to stderr and exit. + * + * The code generation for the typical case of matching input requires the FSM + * to be a DFA, and will EINVAL if the FSM is not a DFA. As opposed to e.g. + * FSM_PRINT_API, which generates code for other purposes, and does not place + * particular expecations on the FSM. * * The output options may be NULL, indicating to use defaults. * diff --git a/src/libfsm/print.c b/src/libfsm/print.c index 6ff34e4ae..f8522c678 100644 --- a/src/libfsm/print.c +++ b/src/libfsm/print.c @@ -18,6 +18,7 @@ #include "print.h" #include "internal.h" +#include "vm/retlist.h" #include "vm/vm.h" #include "print/ir.h" @@ -81,6 +82,31 @@ print_hook_accept(FILE *f, return 0; } +int +print_hook_comment(FILE *f, + const struct fsm_options *opt, + const struct fsm_hooks *hooks, + const fsm_end_id_t *ids, size_t count) +{ + assert(f != NULL); + assert(opt != NULL); + assert(hooks != NULL); + + if (opt->ambig == AMBIG_ERROR) { + assert(count <= 1); + } + + if (opt->comments && hooks->comment != NULL) { + /* this space is a polyglot */ + fprintf(f, " "); + + return hooks->comment(f, opt, ids, count, + hooks->hook_opaque); + } + + return 0; +} + int print_hook_reject(FILE *f, const struct fsm_options *opt, @@ -179,6 +205,7 @@ print_conflicts(FILE *f, const struct fsm *fsm, assert(res == 1); // TODO: de-duplicate by ids[], so we don't call the conflict hook an unneccessary number of times + // TODO: now i think this is the same as calling once per retlist entry /* * The conflict hook is called here (rather in the caller), @@ -336,20 +363,37 @@ fsm_print(FILE *f, const struct fsm *fsm, goto done; } + /* + * We're building the retlist here based on the ir. + * I think we could build the retlist earlier instead, + * and then point at the struct ret entries from the ir, + * and then dfavm_compile_ir() would pick those up from there. + * But for now this is good. + */ + struct ret_list retlist; + + if (!build_retlist(&retlist, ir)) { + free_ir(fsm, ir); + goto error; + } + a = zero; /* TODO: non-const a */ - if (!dfavm_compile_ir(&a, ir, vm_opts)) { + if (!dfavm_compile_ir(&a, ir, &retlist, vm_opts)) { + free_retlist(&retlist); free_ir(fsm, ir); return -1; } if (print_vm != NULL) { - r = print_vm(f, opt, hooks, a.linked); + r = print_vm(f, opt, hooks, &retlist, a.linked); } dfavm_opasm_finalize_op(&a); + free_retlist(&retlist); + done: if (ir != NULL) { diff --git a/src/libfsm/print.h b/src/libfsm/print.h index c45fcec53..6fea83274 100644 --- a/src/libfsm/print.h +++ b/src/libfsm/print.h @@ -12,6 +12,7 @@ struct fsm_options; struct fsm_hooks; struct ir; struct dfavm_op_ir; +struct ret_list; int print_hook_args(FILE *f, @@ -31,6 +32,12 @@ print_hook_accept(FILE *f, void *lang_opaque, void *hook_opaque), void *lang_opaque); +int +print_hook_comment(FILE *f, + const struct fsm_options *opt, + const struct fsm_hooks *hooks, + const fsm_end_id_t *ids, size_t count); + int print_hook_reject(FILE *f, const struct fsm_options *opt, @@ -59,6 +66,7 @@ typedef int ir_print_f(FILE *f, typedef int vm_print_f(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops); vm_print_f fsm_print_amd64_att; diff --git a/src/libfsm/print/api.c b/src/libfsm/print/api.c index d7cb70f1f..43b8cb232 100644 --- a/src/libfsm/print/api.c +++ b/src/libfsm/print/api.c @@ -217,7 +217,7 @@ fsm_print_api(FILE *f, } else { fprintf(f, "\tfor (i = 0x%02x; i <= 0x%02x; i++) {", (unsigned int) lo, (unsigned int) hi - 1); - if (rangeclass(lo, hi - 1)) { + if (opt->comments && rangeclass(lo, hi - 1)) { fprintf(f, " /* '%c' .. '%c' */", (unsigned char) lo, (unsigned char) hi - 1); } fprintf(f, "\n"); diff --git a/src/libfsm/print/awk.c b/src/libfsm/print/awk.c index de98f95eb..fad52c3ee 100644 --- a/src/libfsm/print/awk.c +++ b/src/libfsm/print/awk.c @@ -26,6 +26,7 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" #define START UINT32_MAX @@ -156,10 +157,21 @@ print_end(FILE *f, const struct dfavm_op_ir *op, return print_hook_reject(f, opt, hooks, default_reject, NULL); case VM_END_SUCC: - return print_hook_accept(f, opt, hooks, - op->endids.ids, op->endids.count, + if (-1 == print_hook_accept(f, opt, hooks, + op->ret->ids, op->ret->count, default_accept, - NULL); + NULL)) + { + return -1; + } + + if (-1 == print_hook_comment(f, opt, hooks, + op->ret->ids, op->ret->count)) + { + return -1; + } + + return 0; default: assert(!"unreached"); @@ -186,6 +198,7 @@ static int fsm_print_awkfrag(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops, const char *cp, const char *prefix) { @@ -194,6 +207,7 @@ fsm_print_awkfrag(FILE *f, assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); assert(cp != NULL); assert(prefix != NULL); @@ -289,6 +303,7 @@ int fsm_print_awk(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { const char *prefix; @@ -297,6 +312,7 @@ fsm_print_awk(FILE *f, assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); if (opt->prefix != NULL) { prefix = opt->prefix; @@ -311,7 +327,7 @@ fsm_print_awk(FILE *f, } if (opt->fragment) { - if (-1 == fsm_print_awkfrag(f, opt, hooks, ops, cp, prefix)) { + if (-1 == fsm_print_awkfrag(f, opt, hooks, retlist, ops, cp, prefix)) { return -1; } } else { @@ -333,7 +349,7 @@ fsm_print_awk(FILE *f, fprintf(f, ", l, c) {\n"); - if (-1 == fsm_print_awkfrag(f, opt, hooks, ops, cp, prefix)) { + if (-1 == fsm_print_awkfrag(f, opt, hooks, retlist, ops, cp, prefix)) { return -1; } diff --git a/src/libfsm/print/c.c b/src/libfsm/print/c.c index 30a176199..bcea6ab5b 100644 --- a/src/libfsm/print/c.c +++ b/src/libfsm/print/c.c @@ -71,15 +71,17 @@ print_ids(FILE *f, return -1; } - fprintf(f, "return %u;", ids[0]); - break; + /* fallthrough */ case AMBIG_EARLIEST: /* * The libfsm api guarentees these ids are unique, * and only appear once each, and are sorted. */ - fprintf(f, "return %u;", ids[0]); + fprintf(f, "{\n"); + fprintf(f, "\t\t*id = %u;\n", ids[0]); + fprintf(f, "\t\treturn 1;\n"); + fprintf(f, "\t}"); break; case AMBIG_MULTIPLE: @@ -101,7 +103,7 @@ print_ids(FILE *f, fprintf(f, " };\n"); fprintf(f, "\t\t*ids = a;\n"); fprintf(f, "\t\t*count = %zu;\n", count); - fprintf(f, "\t\treturn 0;\n"); + fprintf(f, "\t\treturn 1;\n"); fprintf(f, "\t}"); break; @@ -352,12 +354,18 @@ print_endstates(FILE *f, /* no end states */ if (!ir_hasend(ir)) { - fprintf(f, "\treturn 0; /* unexpected EOT */\n"); + fprintf(f, "\treturn 0;"); + if (opt->comments) { + fprintf(f, " /* unexpected EOT */"); + } + fprintf(f, "\n"); return 0; } /* usual case */ - fprintf(f, "\t/* end states */\n"); + if (opt->comments) { + fprintf(f, "\t/* end states */\n"); + } fprintf(f, "\tswitch (state) {\n"); for (i = 0; i < ir->n; i++) { if (!ir->states[i].isend) { @@ -374,6 +382,12 @@ print_endstates(FILE *f, return -1; } + if (-1 == print_hook_comment(f, opt, hooks, + ir->states[i].endids.ids, ir->states[i].endids.count)) + { + return -1; + } + fprintf(f, "\n"); } @@ -410,7 +424,7 @@ fsm_print_cfrag(FILE *f, const struct ir *ir, fprintf(f, " /* e.g. \""); escputs(f, opt, c_escputc_str, ir->states[i].example); fprintf(f, "\" */"); - } else if (i == ir->start) { + } else if (i == ir->start && opt->comments) { fprintf(f, " /* start */"); } } @@ -423,7 +437,11 @@ fsm_print_cfrag(FILE *f, const struct ir *ir, fprintf(f, "\n"); } fprintf(f, "\t\tdefault:\n"); - fprintf(f, "\t\t\t; /* unreached */\n"); + fprintf(f, "\t\t\t;"); + if (opt->comments) { + fprintf(f, " /* unreached */"); + } + fprintf(f, "\n"); fprintf(f, "\t\t}\n"); if (ferror(f)) { @@ -595,7 +613,11 @@ fsm_print_c(FILE *f, } if (ir->n == 0) { - fprintf(f, "\treturn 0; /* no matches */\n"); + fprintf(f, "\treturn 0;"); + if (opt->comments) { + fprintf(f, " /* no matches */"); + } + fprintf(f, "\n"); } else { if (-1 == fsm_print_c_body(f, ir, opt, hooks)) { return -1; diff --git a/src/libfsm/print/dot.c b/src/libfsm/print/dot.c index aeb79b254..f4168e1d3 100644 --- a/src/libfsm/print/dot.c +++ b/src/libfsm/print/dot.c @@ -265,12 +265,22 @@ print_dotfrag(FILE *f, return -1; } + if (opt->comments && hooks->comment != NULL) { + fprintf(f, ","); + + if (-1 == print_hook_comment(f, opt, hooks, + ids, count)) + { + return -1; + } + } + fprintf(f, " ];\n"); f_free(fsm->alloc, ids); } - /* TODO: show example here, unless !opt->comments */ + /* TODO: comment example per state */ if (-1 == print_state(f, opt, hooks, fsm, prefix, s)) { return -1; diff --git a/src/libfsm/print/fsm.c b/src/libfsm/print/fsm.c index cd42e01a3..0c93e4338 100644 --- a/src/libfsm/print/fsm.c +++ b/src/libfsm/print/fsm.c @@ -364,7 +364,13 @@ fsm_print_fsm(FILE *f, if (-1 == print_hook_accept(f, opt, hooks, ids, count, default_accept, - NULL)) + NULL)) + { + return -1; + } + + if (-1 == print_hook_comment(f, opt, hooks, + ids, count)) { return -1; } diff --git a/src/libfsm/print/go.c b/src/libfsm/print/go.c index ee87538a8..50195b69f 100644 --- a/src/libfsm/print/go.c +++ b/src/libfsm/print/go.c @@ -25,6 +25,7 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" static const char * @@ -47,7 +48,8 @@ cmp_operator(int cmp) static int print_ids(FILE *f, - enum fsm_ambig ambig, const fsm_end_id_t *ids, size_t count) + enum fsm_ambig ambig, const fsm_end_id_t *ids, size_t count, + size_t i) { switch (ambig) { case AMBIG_NONE: @@ -60,7 +62,7 @@ print_ids(FILE *f, return -1; } - fprintf(f, ", %u;", ids[0]); + fprintf(f, ", %u", ids[0]); break; case AMBIG_EARLIEST: @@ -68,12 +70,12 @@ print_ids(FILE *f, * The libfsm api guarentees these ids are unique, * and only appear once each, and are sorted. */ - fprintf(f, ", %u;", ids[0]); + fprintf(f, ", %u", ids[0]); break; case AMBIG_MULTIPLE: - assert(!"unimplemented"); - abort(); + fprintf(f, ", ret%zu", i); + break; default: assert(!"unreached"); @@ -88,16 +90,19 @@ default_accept(FILE *f, const struct fsm_options *opt, const fsm_end_id_t *ids, size_t count, void *lang_opaque, void *hook_opaque) { + size_t i; + assert(f != NULL); assert(opt != NULL); - assert(lang_opaque == NULL); + assert(lang_opaque != NULL); - (void) lang_opaque; (void) hook_opaque; + i = * (const size_t *) lang_opaque; + fprintf(f, "return true"); - if (-1 == print_ids(f, opt->ambig, ids, count)) { + if (-1 == print_ids(f, opt->ambig, ids, count, i)) { return -1; } @@ -115,7 +120,30 @@ default_reject(FILE *f, const struct fsm_options *opt, (void) lang_opaque; (void) hook_opaque; - fprintf(f, "{\n\t\treturn false\n\t}\n"); + fprintf(f, "{\n\t\treturn false"); + + switch (opt->ambig) { + case AMBIG_NONE: + break; + + case AMBIG_ERROR: + fprintf(f, ", 0"); + break; + + case AMBIG_EARLIEST: + fprintf(f, ", 0"); + break; + + case AMBIG_MULTIPLE: + fprintf(f, ", nil"); + break; + + default: + assert(!"unreached"); + abort(); + } + + fprintf(f, "\n\t}\n"); return 0; } @@ -150,20 +178,32 @@ static int print_end(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, enum dfavm_op_end end_bits) { + size_t i; + switch (end_bits) { case VM_END_FAIL: return print_hook_reject(f, opt, hooks, default_reject, NULL); case VM_END_SUCC: + assert(op->ret >= retlist->a); + + i = op->ret - retlist->a; + fprintf(f, "{\n"); fprintf(f, "\t\t"); if (-1 == print_hook_accept(f, opt, hooks, - op->endids.ids, op->endids.count, - default_accept, - NULL)) + op->ret->ids, op->ret->count, + default_accept, &i)) + { + return -1; + } + + if (-1 == print_hook_comment(f, opt, hooks, + op->ret->ids, op->ret->count)) { return -1; } @@ -198,11 +238,27 @@ print_fetch(FILE *f, const struct fsm_options *opt) } } +static void +print_ret(FILE *f, const unsigned *ids, size_t count) +{ + size_t i; + + fprintf(f, "[]uint{"); + for (i = 0; i < count; i++) { + fprintf(f, "%u", ids[i]); + if (i + 1 < count) { + fprintf(f, ", "); + } + } + fprintf(f, "}"); +} + /* TODO: eventually to be non-static */ static int fsm_print_gofrag(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops, const char *cp) { @@ -210,6 +266,7 @@ fsm_print_gofrag(FILE *f, assert(f != NULL); assert(opt != NULL); + assert(retlist != NULL); assert(cp != NULL); /* TODO: we'll need to heed cp for e.g. lx's codegen */ @@ -272,12 +329,12 @@ fsm_print_gofrag(FILE *f, switch (op->instr) { case VM_OP_STOP: print_cond(f, op, opt); - print_end(f, op, opt, hooks, op->u.stop.end_bits); + print_end(f, op, opt, hooks, retlist, op->u.stop.end_bits); break; case VM_OP_FETCH: print_fetch(f, opt); - print_end(f, op, opt, hooks, op->u.fetch.end_bits); + print_end(f, op, opt, hooks, retlist, op->u.fetch.end_bits); break; case VM_OP_BRANCH: @@ -300,6 +357,7 @@ int fsm_print_go(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { const char *prefix; @@ -311,6 +369,7 @@ fsm_print_go(FILE *f, assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); if (opt->prefix != NULL) { prefix = opt->prefix; @@ -325,13 +384,22 @@ fsm_print_go(FILE *f, } if (opt->fragment) { - if (-1 == fsm_print_gofrag(f, opt, hooks, ops, cp)) { + if (-1 == fsm_print_gofrag(f, opt, hooks, retlist, ops, cp)) { return -1; } } else { fprintf(f, "package %sfsm\n", package_prefix); fprintf(f, "\n"); + if (opt->ambig == AMBIG_MULTIPLE) { + for (size_t i = 0; i < retlist->count; i++) { + fprintf(f, "var ret%zu []uint = ", i); + print_ret(f, retlist->a[i].ids, retlist->a[i].count); + fprintf(f, "\n"); + } + fprintf(f, "\n"); + } + fprintf(f, "func %sMatch", prefix); switch (opt->io) { @@ -362,17 +430,16 @@ fsm_print_go(FILE *f, case AMBIG_NONE: fprintf(f, "bool"); break; - + case AMBIG_ERROR: case AMBIG_EARLIEST: fprintf(f, "(bool, uint)"); break; case AMBIG_MULTIPLE: - // TODO: fprintf(f, "(bool, uint[])"); - errno = ENOTSUP; - return -1; - + fprintf(stdout, "(bool, []uint)"); + break; + default: assert(!"unreached"); abort(); @@ -380,7 +447,7 @@ fsm_print_go(FILE *f, fprintf(f, " {\n"); - if (-1 == fsm_print_gofrag(f, opt, hooks, ops, cp)) { + if (-1 == fsm_print_gofrag(f, opt, hooks, retlist, ops, cp)) { return -1; } diff --git a/src/libfsm/print/irdot.c b/src/libfsm/print/irdot.c index 186365203..cff37ee86 100644 --- a/src/libfsm/print/irdot.c +++ b/src/libfsm/print/irdot.c @@ -215,8 +215,6 @@ print_state(FILE *f, fprintf(f, "\n"); } - /* TODO: leaf callback for dot output */ - /* showing hook in addition to existing content */ if (cs->isend && hooks->accept != NULL) { fprintf(f, "\t\t "); diff --git a/src/libfsm/print/llvm.c b/src/libfsm/print/llvm.c index ac87090df..ec931769c 100644 --- a/src/libfsm/print/llvm.c +++ b/src/libfsm/print/llvm.c @@ -28,32 +28,25 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" #define OPAQUE_POINTERS 1 -#ifdef OPAQUE_POINTERS // llvm >= 15 +#if OPAQUE_POINTERS // llvm >= 15 static const char *ptr_i8 = "ptr"; static const char *ptr_i32 = "ptr"; static const char *ptr_void = "ptr"; +static const char *ptr_rt = "ptr"; #else static const char *ptr_i8 = "i8*"; static const char *ptr_i32 = "i32*"; static const char *ptr_void = "i8*"; +static const char *ptr_rt = "%rt*"; #endif static const struct dfavm_op_ir fail; // used as a unqiue address only -struct ret { - size_t count; - const fsm_end_id_t *ids; -}; - -struct ret_list { - size_t count; - struct ret *a; -}; - /* * If we had a stack, the current set of live values would be a frame. * We're a DFA, so we don't have a stack. But I still think of them as a frame. @@ -100,19 +93,16 @@ default_accept(FILE *f, const struct fsm_options *opt, const fsm_end_id_t *ids, size_t count, void *lang_opaque, void *hook_opaque) { - size_t i; - assert(f != NULL); assert(opt != NULL); - assert(lang_opaque != NULL); + assert(lang_opaque == NULL); (void) hook_opaque; - - i = * (const size_t *) lang_opaque; + (void) lang_opaque; switch (opt->ambig) { case AMBIG_NONE: - fprintf(f, "[true, %%ret%zu],\n", i); + fprintf(f, "%%rt true"); break; case AMBIG_ERROR: @@ -122,7 +112,7 @@ default_accept(FILE *f, const struct fsm_options *opt, return -1; } - fprintf(f, "[{ true, %u }, %%ret%zu],\n", ids[0], i); + fprintf(f, "%%rt { i1 true, i32 %u }", ids[0]); break; case AMBIG_EARLIEST: @@ -130,13 +120,19 @@ default_accept(FILE *f, const struct fsm_options *opt, * The libfsm api guarentees these ids are unique, * and only appear once each, and are sorted. */ - fprintf(f, "[{ true, i32 %u }, %%ret%zu],\n", ids[0], i); + fprintf(f, "%%rt { i1 true, i32 %u }", ids[0]); break; case AMBIG_MULTIPLE: - // TODO: probably { i1, ptr_u8 } - assert(!"unimplemented"); - abort(); + fprintf(f, "internal unnamed_addr constant [%zu x i32] [", count); + for (size_t j = 0; j < count; j++) { + fprintf(f, "i32 %u", ids[j]); + if (j + 1 < count) { + fprintf(f, ", "); + } + } + fprintf(f, "]"); + break; default: assert(!"unreached"); @@ -159,13 +155,16 @@ default_reject(FILE *f, const struct fsm_options *opt, switch (opt->ambig) { case AMBIG_NONE: - fprintf(f, "[false, %%fail]\n"); + fprintf(f, "%%rt false"); break; case AMBIG_ERROR: case AMBIG_EARLIEST: + fprintf(f, "%%rt { i1 false, i32 poison }"); + break; + case AMBIG_MULTIPLE: - fprintf(f, "[{ false, undef }, %%fail]\n"); + fprintf(f, "%%rt { %s poison, i64 -1 }", ptr_i32); break; default: @@ -173,12 +172,18 @@ default_reject(FILE *f, const struct fsm_options *opt, abort(); } + if (opt->comments) { + fprintf(f, " ; fail"); + } + return 0; } static int -print_rettype(FILE *f, enum fsm_ambig ambig) +print_rettype(FILE *f, const char *name, enum fsm_ambig ambig) { + fprintf(f, "%s = type ", name); + switch (ambig) { case AMBIG_NONE: fprintf(f, "i1"); @@ -186,19 +191,22 @@ print_rettype(FILE *f, enum fsm_ambig ambig) case AMBIG_ERROR: case AMBIG_EARLIEST: - fprintf(f, "{ i1, u32 }"); + // success, id + fprintf(f, "{ i1, i32 }"); break; case AMBIG_MULTIPLE: - // TODO: probably { i1, ptr_u8 } - assert(!"unimplemented"); - abort(); + // ids, -1/count + fprintf(f, "{ %s, i64 }", ptr_i32); + break; default: assert(!"unreached"); abort(); } + fprintf(f, "\n"); + return 0; } @@ -267,8 +275,11 @@ print_cond(FILE *f, const struct fsm_options *opt, struct dfavm_op_ir *op, fprintf(f, "icmp %s i8 %%c%u, ", cmp_operator(op->cmp), use(&frame->c)); llvm_escputcharlit(f, opt, op->cmp_arg); - fprintf(f, " ; "); - c_escputcharlit(f, opt, op->cmp_arg); // C escaping for a comment + + if (opt->comments) { + fprintf(f, " ; "); + c_escputcharlit(f, opt, op->cmp_arg); // C escaping for a comment + } fprintf(f, "\n"); } @@ -328,7 +339,11 @@ print_fetch(FILE *f, const struct fsm_options *opt, ptr_i8); print_decl(f, "r", decl(&frame->r)); - fprintf(f, "icmp eq i32 %%i%u, -1 ; EOF\n", n); + fprintf(f, "icmp eq i32 %%i%u, -1", n); + if (opt->comments) { + fprintf(f, " ; EOF"); + } + fprintf(f, "\n"); // XXX: we don't distinguish error from eof // https://github.com/katef/libfsm/issues/484 @@ -358,8 +373,11 @@ print_fetch(FILE *f, const struct fsm_options *opt, ptr_i8, n); print_decl(f, "r", decl(&frame->r)); - fprintf(f, "icmp eq i8 %%c%u, 0 ; EOT\n", - n); + fprintf(f, "icmp eq i8 %%c%u, 0", n); + if (opt->comments) { + fprintf(f, " ; EOT"); + } + fprintf(f, "\n"); print_branch(f, frame, end_bits == VM_END_FAIL ? &fail : NULL, @@ -381,8 +399,11 @@ print_fetch(FILE *f, const struct fsm_options *opt, ptr_i8, n); print_decl(f, "r", decl(&frame->r)); - fprintf(f, "icmp eq %s %%p%u, %%e ; EOF\n", - ptr_i8, n); + fprintf(f, "icmp eq %s %%p%u, %%e", ptr_i8, n); + if (opt->comments) { + fprintf(f, " ; EOT"); + } + fprintf(f, "\n"); print_branch(f, frame, end_bits == VM_END_FAIL ? &fail : NULL, @@ -410,127 +431,22 @@ print_fetch(FILE *f, const struct fsm_options *opt, } } -static bool -append_ret(struct ret_list *list, - const fsm_end_id_t *ids, size_t count) -{ - const size_t low = 16; /* must be power of 2 */ - const size_t factor = 2; /* must be even */ - - assert(list != NULL); - - if (list->count == 0) { - list->a = malloc(low * sizeof *list->a); - if (list->a == NULL) { - return false; - } - } else if (list->count >= low && (list->count & (list->count - 1)) == 0) { - void *tmp; - size_t new = list->count * factor; - if (new < list->count) { - errno = E2BIG; - perror("realloc"); - exit(EXIT_FAILURE); - } - - tmp = realloc(list->a, new * sizeof *list->a); - if (tmp == NULL) { - return false; - } - - list->a = tmp; - } - - list->a[list->count].ids = ids; - list->a[list->count].count = count; - - list->count++; - - return true; -} - -static int -cmp_ret_by_endid(const void *pa, const void *pb) -{ - const struct ret *a = pa; - const struct ret *b = pb; - - if (a->count < b->count) { return -1; } - if (a->count > b->count) { return +1; } - - return memcmp(a->ids, b->ids, a->count * sizeof *a->ids); -} - -static struct ret * -find_ret(const struct ret_list *list, const struct dfavm_op_ir *op, - int (*cmp)(const void *pa, const void *pb)) -{ - struct ret key; - - assert(op != NULL); - assert(cmp != NULL); - - key.count = op->endids.count; - key.ids = op->endids.ids; - - return bsearch(&key, list->a, list->count, sizeof *list->a, cmp); -} - -static bool -build_retlist(struct ret_list *list, const struct dfavm_op_ir *a) -{ - const struct dfavm_op_ir *op; - - assert(list != NULL); - - for (op = a; op != NULL; op = op->next) { - switch (op->instr) { - case VM_OP_STOP: - if (op->u.stop.end_bits == VM_END_FAIL) { - /* %fail is special, don't add to retlist */ - continue; - } - - break; - - case VM_OP_FETCH: - if (op->u.fetch.end_bits == VM_END_FAIL) { - /* %fail is special, don't add to retlist */ - continue; - } - - break; - - case VM_OP_BRANCH: - continue; - - default: - assert(!"unreached"); - abort(); - } - - if (!append_ret(list, op->endids.ids, op->endids.count)) { - return false; - } - } - - return true; -} - /* TODO: eventually to be non-static */ static int fsm_print_llvmfrag(FILE *f, const struct fsm_options *opt, - const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops, - const char *cp) + const char *cp, + const char *prefix) { - struct ret_list retlist; struct dfavm_op_ir *op; assert(f != NULL); assert(opt != NULL); + assert(retlist != NULL); assert(cp != NULL); + assert(prefix != NULL); /* TODO: we'll need to heed cp for e.g. lx's codegen */ (void) cp; @@ -546,33 +462,6 @@ fsm_print_llvmfrag(FILE *f, } { - retlist.count = 0; - build_retlist(&retlist, ops); - - if (retlist.count > 0) { - size_t j = 0; - - /* sort for both dedup and bsearch */ - qsort(retlist.a, retlist.count, sizeof *retlist.a, cmp_ret_by_endid); - - /* deduplicate based on endids only. - * j is the start of a run; i increments until we find - * the start of the next run */ - for (size_t i = 1; i < retlist.count; i++) { - assert(i > j); - if (cmp_ret_by_endid(&retlist.a[j], &retlist.a[i]) == 0) { - continue; - } - - j++; - retlist.a[j] = retlist.a[i]; - } - - retlist.count = j + 1; - - assert(retlist.count > 0); - } - print_jump(f, ops); /* @@ -582,12 +471,14 @@ fsm_print_llvmfrag(FILE *f, * This looks like: * * stop: - * %ret = phi i1 - * [true, %ret0], ; "abc" - * [true, %ret1], ; "xyz" - * [true, %ret2], ; "abc", "xyz" - * [false, %fail] - * ret i1 %ret + * %i = phi i64 + * [0, %ret0], + * [1, %ret1], + * [2, %ret2], + * [3, %fail] + * %p = getelementptr inbounds [4 x %rt], [4 x %rt]* @fsm.r, i64 0, i64 %i + * %ret = load %rt, ptr %p + * ret %rt %ret * fail: * br label %stop * ret0: @@ -597,6 +488,8 @@ fsm_print_llvmfrag(FILE *f, * ret2: * br label %stop * + * where @fsm.r is [4 x %rt] and %rt is the return type. + * * And we jump to stop: via the ret*: labels rather than * to a phi node directly. This helps for two reasons: * @@ -612,36 +505,23 @@ fsm_print_llvmfrag(FILE *f, */ print_label(f, true, "stop"); - fprintf(f, "\t%%ret = phi "); - print_rettype(f, opt->ambig); - fprintf(f, "\n"); - - for (size_t i = 0; i < retlist.count; i++) { - fprintf(f, "\t "); - - if (-1 == print_hook_accept(f, opt, hooks, - retlist.a[i].ids, retlist.a[i].count, - default_accept, &i)) - { - return -1; - } + fprintf(f, "\t%%i = phi i64\n"); + for (size_t i = 0; i < retlist->count; i++) { + fprintf(f, "\t [%zu, %%ret%zu],\n", i, i); } + fprintf(f, "\t [%zu, %%fail]\n", retlist->count); - fprintf(f, "\t "); - if (-1 == print_hook_reject(f, opt, hooks, default_reject, NULL)) { - return -1; - } - - fprintf(f, "\tret "); - print_rettype(f, opt->ambig); - fprintf(f, " %%ret\n"); + fprintf(f, "\t%%p = getelementptr inbounds [%zu x %%rt], [%zu x %%rt]* @%sr, i64 0, i64 %%i\n", + retlist->count + 1, retlist->count + 1, prefix); + fprintf(f, "\t%%ret = load %%rt, %s %%p\n", ptr_rt); + fprintf(f, "\tret %%rt %%ret\n"); print_label(f, true, "fail"); fprintf(f, "\tbr "); print_label(f, false, "stop"); fprintf(f, "\n"); - for (size_t i = 0; i < retlist.count; i++) { + for (size_t i = 0; i < retlist->count; i++) { print_label(f, true, "ret%zu", i); fprintf(f, "\tbr "); print_label(f, false, "stop"); @@ -653,16 +533,22 @@ fsm_print_llvmfrag(FILE *f, for (op = ops; op != NULL; op = op->next) { if (op->instr != VM_OP_STOP || op->cmp != VM_CMP_ALWAYS || op->u.stop.end_bits != VM_END_FAIL) { print_label(f, true, "l%" PRIu32, op->index); - } - if (op->example != NULL) { - /* C's escaping seems to be a subset of llvm's, and these are - * for comments anyway. So I'm borrowing this for C here */ - fprintf(f, "\t; e.g. \""); - escputs(f, opt, c_escputc_str, op->example); - fprintf(f, "\""); + /* + * We only show examples when there's a label for the block, + * otherwise it's confusing with the conditionally elided + * optimisations per-instruction below, which can result in + * no block code being emitted for a particular vm op. + */ + if (op->example != NULL) { + /* C's escaping seems to be a subset of llvm's, and these are + * for comments anyway. So I'm borrowing this for C here */ + fprintf(f, "\t; e.g. \""); + escputs(f, opt, c_escputc_str, op->example); + fprintf(f, "\""); - fprintf(f, "\n"); + fprintf(f, "\n"); + } } switch (op->instr) { @@ -683,14 +569,12 @@ fsm_print_llvmfrag(FILE *f, if (op->u.stop.end_bits == VM_END_FAIL) { /* handled above */ } else { - assert(retlist.count > 0); - const struct ret *ret = find_ret(&retlist, op, cmp_ret_by_endid); + assert(retlist->count > 0); + const struct ret *ret = op->ret; assert(ret != NULL); - assert(ret >= retlist.a && ret <= (retlist.a + retlist.count)); - assert(ret->count == op->endids.count); - assert(0 == memcmp(ret->ids, op->endids.ids, ret->count)); + assert(ret >= retlist->a && ret <= (retlist->a + retlist->count)); fprintf(f, "\tbr "); - print_label(f, false, "ret%u", ret - retlist.a); + print_label(f, false, "ret%u", ret - retlist->a); fprintf(f, "\n"); } break; @@ -701,14 +585,12 @@ fsm_print_llvmfrag(FILE *f, if (op->u.fetch.end_bits == VM_END_FAIL) { /* handled in print_fetch() */ } else { - assert(retlist.count > 0); - const struct ret *ret = find_ret(&retlist, op, cmp_ret_by_endid); + assert(retlist->count > 0); + const struct ret *ret = op->ret; assert(ret != NULL); - assert(ret >= retlist.a && ret <= (retlist.a + retlist.count)); - assert(ret->count == op->endids.count); - assert(0 == memcmp(ret->ids, op->endids.ids, ret->count)); + assert(ret >= retlist->a && ret <= (retlist->a + retlist->count)); fprintf(f, "\tbr "); - print_label(f, false, "ret%u", ret - retlist.a); + print_label(f, false, "ret%u", ret - retlist->a); fprintf(f, "\n"); } break; @@ -734,10 +616,6 @@ fsm_print_llvmfrag(FILE *f, } } - if (retlist.count > 0) { - free(retlist.a); - } - return 0; } @@ -745,6 +623,7 @@ int fsm_print_llvm(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { const char *prefix; @@ -753,11 +632,12 @@ fsm_print_llvm(FILE *f, assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); if (opt->prefix != NULL) { prefix = opt->prefix; } else { - prefix = "fsm_"; + prefix = "fsm."; } if (hooks->cp != NULL) { @@ -767,13 +647,71 @@ fsm_print_llvm(FILE *f, } if (opt->fragment) { - fsm_print_llvmfrag(f, opt, hooks, ops, cp); + fsm_print_llvmfrag(f, opt, retlist, ops, cp, prefix); return 0; } fprintf(f, "; generated\n"); -//XXX: type depends on ambig - fprintf(f, "define dso_local i1 @%smain", prefix); + print_rettype(f, "%rt", opt->ambig); + + /* + * For AMBIG_MULTIPLE we emit a bunch of arrays and then point at them from + * each %rt. So we call the hook for the arrays, because that's where the id + * list is. For other ambig modes, we call the hook for the %rt instead. + */ + if (opt->ambig == AMBIG_MULTIPLE) { + for (size_t i = 0; i < retlist->count; i++) { + fprintf(f, "@%sr%zu = ", prefix, i); + if (-1 == print_hook_accept(f, opt, hooks, + retlist->a[i].ids, retlist->a[i].count, + default_accept, NULL)) + { + return -1; + } + + if (-1 == print_hook_comment(f, opt, hooks, + retlist->a[i].ids, retlist->a[i].count)) + { + return -1; + } + + fprintf(f, "\n"); + } + } + + fprintf(f, "@%sr = internal unnamed_addr constant [%zu x %%rt] [\n", prefix, retlist->count + 1); + for (size_t i = 0; i < retlist->count; i++) { + fprintf(f, "\t "); + if (opt->ambig == AMBIG_MULTIPLE) { + fprintf(f, "%%rt { %s bitcast ([%zu x i32]* @%sr%zu to %s), i64 %zu }", + ptr_i32, retlist->a[i].count, prefix, i, ptr_i32, retlist->a[i].count); + fprintf(f, ","); + } else { + if (-1 == print_hook_accept(f, opt, hooks, + retlist->a[i].ids, retlist->a[i].count, + default_accept, NULL)) + { + return -1; + } + + fprintf(f, ","); + + if (-1 == print_hook_comment(f, opt, hooks, + retlist->a[i].ids, retlist->a[i].count)) + { + return -1; + } + } + fprintf(f, "\n"); + } + fprintf(f, "\t "); + if (-1 == print_hook_reject(f, opt, hooks, default_reject, NULL)) { + return -1; + } + fprintf(f, "\n"); + fprintf(f, "\t]\n"); + + fprintf(f, "define dso_local %%rt @%smain", prefix); switch (opt->io) { case FSM_IO_GETC: @@ -824,7 +762,7 @@ fsm_print_llvm(FILE *f, exit(EXIT_FAILURE); } - fsm_print_llvmfrag(f, opt, hooks, ops, cp); + fsm_print_llvmfrag(f, opt, retlist, ops, cp, prefix); fprintf(f, "}\n"); fprintf(f, "\n"); diff --git a/src/libfsm/print/rust.c b/src/libfsm/print/rust.c index 5c35747d9..b11fb2c5a 100644 --- a/src/libfsm/print/rust.c +++ b/src/libfsm/print/rust.c @@ -26,6 +26,7 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" #define START UINT32_MAX @@ -50,7 +51,8 @@ cmp_operator(int cmp) static int print_ids(FILE *f, - enum fsm_ambig ambig, const fsm_end_id_t *ids, size_t count) + enum fsm_ambig ambig, const fsm_end_id_t *ids, size_t count, + size_t i) { switch (ambig) { case AMBIG_NONE: @@ -62,8 +64,8 @@ print_ids(FILE *f, if (count > 1) { errno = EINVAL; return -1; - } - + } + fprintf(f, "return Some(%u)", ids[0]); break; @@ -74,16 +76,16 @@ print_ids(FILE *f, */ fprintf(f, "return Some(%u)", ids[0]); break; - + case AMBIG_MULTIPLE: - assert(!"unimplemented"); - abort(); - + fprintf(f, "return Some(&RET%zu)", i); + break; + default: assert(!"unreached"); abort(); } - + return 0; } @@ -91,29 +93,32 @@ static int default_accept(FILE *f, const struct fsm_options *opt, const fsm_end_id_t *ids, size_t count, void *lang_opaque, void *hook_opaque) -{ +{ + size_t i; + assert(f != NULL); assert(opt != NULL); - assert(lang_opaque == NULL); - - (void) lang_opaque; + assert(lang_opaque != NULL); + (void) hook_opaque; - - if (-1 == print_ids(f, opt->ambig, ids, count)) { + + i = * (const size_t *) lang_opaque; + + if (-1 == print_ids(f, opt->ambig, ids, count, i)) { return -1; } return 0; } -static int +static int default_reject(FILE *f, const struct fsm_options *opt, void *lang_opaque, void *hook_opaque) { assert(f != NULL); assert(opt != NULL); assert(lang_opaque == NULL); - + (void) lang_opaque; (void) hook_opaque; @@ -162,17 +167,24 @@ static int print_end(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, enum dfavm_op_end end_bits) { + size_t i; + switch (end_bits) { case VM_END_FAIL: return print_hook_reject(f, opt, hooks, default_reject, NULL); case VM_END_SUCC: + assert(op->ret >= retlist->a); + + i = op->ret - retlist->a; + return print_hook_accept(f, opt, hooks, - op->endids.ids, op->endids.count, + op->ret->ids, op->ret->count, default_accept, - NULL); + &i); default: assert(!"unreached"); @@ -200,11 +212,27 @@ print_fetch(FILE *f) fprintf(f, "bytes.next()"); } +static void +print_ret(FILE *f, const unsigned *ids, size_t count) +{ + size_t i; + + fprintf(f, "["); + for (i = 0; i < count; i++) { + fprintf(f, "%u", ids[i]); + if (i + 1 < count) { + fprintf(f, ", "); + } + } + fprintf(f, "];"); +} + /* TODO: eventually to be non-static */ static int fsm_print_rustfrag(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops, const char *cp) { @@ -213,11 +241,21 @@ fsm_print_rustfrag(FILE *f, assert(f != NULL); assert(opt != NULL); + assert(retlist != NULL); assert(cp != NULL); /* TODO: we'll need to heed cp for e.g. lx's codegen */ (void) cp; + if (opt->ambig == AMBIG_MULTIPLE) { + for (size_t i = 0; i < retlist->count; i++) { + fprintf(f, " static RET%zu: [u32; %zu] = ", i, retlist->a[i].count); + print_ret(f, retlist->a[i].ids, retlist->a[i].count); + fprintf(f, "\n"); + } + fprintf(f, "\n"); + } + /* * We only output labels for ops which are branched to. This gives * gaps in the sequence for ops which don't need a label. @@ -319,13 +357,21 @@ fsm_print_rustfrag(FILE *f, if (op->cmp != VM_CMP_ALWAYS) { fprintf(f, "{ "); } - if (-1 == print_end(f, op, opt, hooks, op->u.stop.end_bits)) { + if (-1 == print_end(f, op, opt, hooks, retlist, op->u.stop.end_bits)) { return -1; } if (op->cmp != VM_CMP_ALWAYS) { fprintf(f, " }"); } + if (op->u.stop.end_bits == VM_END_SUCC) { + if (-1 == print_hook_comment(f, opt, hooks, + op->ret->ids, op->ret->count)) + { + return -1; + } + } + if (op->cmp == VM_CMP_ALWAYS) { /* the code for fallthrough would be unreachable */ fallthrough = false; @@ -367,7 +413,7 @@ fsm_print_rustfrag(FILE *f, fprintf(f, " "); fprintf(f, "None => "); - print_end(f, op, opt, hooks, op->u.fetch.end_bits); + print_end(f, op, opt, hooks, retlist, op->u.fetch.end_bits); fprintf(f, ",\n"); fprintf(f, " "); @@ -415,6 +461,7 @@ int fsm_print_rust(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { const char *prefix; @@ -423,6 +470,7 @@ fsm_print_rust(FILE *f, assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); if (opt->prefix != NULL) { prefix = opt->prefix; @@ -437,7 +485,7 @@ fsm_print_rust(FILE *f, } if (opt->fragment) { - fsm_print_rustfrag(f, opt, hooks, ops, cp); + fsm_print_rustfrag(f, opt, hooks, retlist, ops, cp); goto error; } @@ -448,22 +496,19 @@ fsm_print_rust(FILE *f, switch (opt->io) { case FSM_IO_GETC: /* e.g. dbg!(fsm_main("abc".as_bytes().iter().copied())); */ - fprintf(f, "(mut bytes: impl Iterator) -> Option<()> {\n"); - fprintf(f, " use Label::*;\n"); + fprintf(f, "(mut bytes: impl Iterator)"); break; case FSM_IO_STR: /* e.g. dbg!(fsm_main("xabces")); */ - fprintf(f, "(%sinput: &str) -> Option<()> {\n", + fprintf(f, "(%sinput: &str)", has_op(ops, VM_OP_FETCH) ? "" : "_"); - fprintf(f, " use Label::*;\n"); break; case FSM_IO_PAIR: /* e.g. dbg!(fsm_main("xabces".as_bytes())); */ - fprintf(f, "(%sinput: &[u8]) -> Option<()> {\n", + fprintf(f, "(%sinput: &[u8])", has_op(ops, VM_OP_FETCH) ? "" : "_"); - fprintf(f, " use Label::*;\n"); break; default: @@ -471,7 +516,27 @@ fsm_print_rust(FILE *f, exit(EXIT_FAILURE); } - fsm_print_rustfrag(f, opt, hooks, ops, cp); + fprintf(f, " -> "); + + switch (opt->ambig) { + case AMBIG_NONE: + case AMBIG_ERROR: + case AMBIG_EARLIEST: + fprintf(f, "Option<()>"); + break; + + case AMBIG_MULTIPLE: + fprintf(f, "Option<&'static [u32]>"); + break; + default: + fprintf(stderr, "unsupported ambig mode\n"); + exit(EXIT_FAILURE); + } + + fprintf(f, " {\n"); + fprintf(f, " use Label::*;\n"); + + fsm_print_rustfrag(f, opt, hooks, retlist, ops, cp); fprintf(f, "}\n"); fprintf(f, "\n"); diff --git a/src/libfsm/print/sh.c b/src/libfsm/print/sh.c index 05692df59..4b9c647c6 100644 --- a/src/libfsm/print/sh.c +++ b/src/libfsm/print/sh.c @@ -26,6 +26,7 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" static const char * @@ -204,10 +205,21 @@ print_end(FILE *f, const struct dfavm_op_ir *op, return print_hook_reject(f, opt, hooks, default_reject, NULL); case VM_END_SUCC: - return print_hook_accept(f, opt, hooks, - op->endids.ids, op->endids.count, + if (-1 == print_hook_accept(f, opt, hooks, + op->ret->ids, op->ret->count, default_accept, - NULL); + NULL)) + { + return -1; + } + + if (-1 == print_hook_comment(f, opt, hooks, + op->ret->ids, op->ret->count)) + { + return -1; + } + + return 0; default: assert(!"unreached"); @@ -233,6 +245,7 @@ int fsm_print_sh(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { struct dfavm_op_ir *op; @@ -240,6 +253,7 @@ fsm_print_sh(FILE *f, assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); if (opt->io != FSM_IO_STR) { errno = ENOTSUP; diff --git a/src/libfsm/print/vmasm.c b/src/libfsm/print/vmasm.c index c5ac87492..dc9b76ee7 100644 --- a/src/libfsm/print/vmasm.c +++ b/src/libfsm/print/vmasm.c @@ -24,6 +24,7 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" enum asm_dialect { @@ -50,11 +51,16 @@ print_end(FILE *f, const struct dfavm_op_ir *op, case VM_END_SUCC: if (-1 == print_hook_accept(f, opt, hooks, - op->endids.ids, op->endids.count, + op->ret->ids, op->ret->count, NULL, NULL)) { return -1; } + if (-1 == print_hook_comment(f, opt, hooks, + op->ret->ids, op->ret->count)) + { + return -1; + } break; default: @@ -152,12 +158,16 @@ print_asm_amd64(FILE *f, switch (opt->io) { case FSM_IO_STR: - fprintf(f, "// func %s%s(data string) int\n", prefix, "Match"); + if (opt->comments) { + fprintf(f, "// func %s%s(data string) int\n", prefix, "Match"); + } fprintf(f, "TEXT ·%s(SB), NOSPLIT, $0-24\n", "Match"); break; case FSM_IO_PAIR: - fprintf(f, "// func %s%s(data []byte) int\n", prefix, "Match"); + if (opt->comments) { + fprintf(f, "// func %s%s(data []byte) int\n", prefix, "Match"); + } fprintf(f, "TEXT ·%s%s(SB), NOSPLIT, $0-32\n", prefix, "Match"); break; @@ -194,7 +204,9 @@ print_asm_amd64(FILE *f, for (op = ops; op != NULL; op = op->next) { if (op->num_incoming > 0) { fprintf(f, "%sl%u:\n", label_dot, op->index); - } else { + + // TODO: example + } else if (opt->comments) { fprintf(f, "%s l%u\n", comment, op->index); } @@ -209,7 +221,9 @@ print_asm_amd64(FILE *f, } if (op->cmp == VM_CMP_ALWAYS && op->next == NULL) { - fprintf(f, "\t%s elided jmp to %sfinish\n", comment, label_dot); + if (opt->comments) { + fprintf(f, "\t%s elided jmp to %sfinish\n", comment, label_dot); + } } else { const char *jmp_op; @@ -358,6 +372,7 @@ static int print_vmasm_encoding(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops, enum asm_dialect dialect) { @@ -366,6 +381,7 @@ print_vmasm_encoding(FILE *f, assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); if (dialect == AMD64_GO) { if (opt->io != FSM_IO_STR && opt->io != FSM_IO_PAIR) { @@ -392,26 +408,29 @@ int fsm_print_amd64_att(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { - return print_vmasm_encoding(f, opt, hooks, ops, AMD64_ATT); + return print_vmasm_encoding(f, opt, hooks, retlist, ops, AMD64_ATT); } int fsm_print_amd64_nasm(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { - return print_vmasm_encoding(f, opt, hooks, ops, AMD64_NASM); + return print_vmasm_encoding(f, opt, hooks, retlist, ops, AMD64_NASM); } int fsm_print_amd64_go(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { - return print_vmasm_encoding(f, opt, hooks, ops, AMD64_GO); + return print_vmasm_encoding(f, opt, hooks, retlist, ops, AMD64_GO); } diff --git a/src/libfsm/print/vmc.c b/src/libfsm/print/vmc.c index e2dd15bad..c43073dca 100644 --- a/src/libfsm/print/vmc.c +++ b/src/libfsm/print/vmc.c @@ -25,6 +25,7 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" static const char * @@ -62,16 +63,18 @@ print_ids(FILE *f, errno = EINVAL; return -1; } - - fprintf(f, "return %u;", ids[0]); - break; - + + /* fallthrough */ + case AMBIG_EARLIEST: /* * The libfsm api guarentees these ids are unique, * and only appear once each, and are sorted. */ - fprintf(f, "return %u;", ids[0]); + fprintf(f, "{\n"); + fprintf(f, "\t\t*id = %u;\n", ids[0]); + fprintf(f, "\t\treturn 1;\n"); + fprintf(f, "\t}"); break; case AMBIG_MULTIPLE: @@ -93,7 +96,7 @@ print_ids(FILE *f, fprintf(f, " };\n"); fprintf(f, "\t\t*ids = a;\n"); fprintf(f, "\t\t*count = %zu;\n", count); - fprintf(f, "\t\treturn 0;\n"); + fprintf(f, "\t\treturn 1;\n"); fprintf(f, "\t}"); break; @@ -175,10 +178,21 @@ print_end(FILE *f, const struct dfavm_op_ir *op, return print_hook_reject(f, opt, hooks, default_reject, NULL); case VM_END_SUCC: - return print_hook_accept(f, opt, hooks, - op->endids.ids, op->endids.count, + if (-1 == print_hook_accept(f, opt, hooks, + op->ret->ids, op->ret->count, default_accept, - NULL); + NULL)) + { + return -1; + } + + if (-1 == print_hook_comment(f, opt, hooks, + op->ret->ids, op->ret->count)) + { + return -1; + } + + return 0; default: assert(!"unreached"); @@ -360,6 +374,7 @@ static int fsm_print_cfrag(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops, const char *cp) { @@ -367,6 +382,7 @@ fsm_print_cfrag(FILE *f, assert(f != NULL); assert(opt != NULL); + assert(retlist != NULL); assert(cp != NULL); /* TODO: we'll need to heed cp for e.g. lx's codegen */ @@ -512,6 +528,7 @@ int fsm_print_vmc(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { const char *prefix; @@ -522,6 +539,7 @@ fsm_print_vmc(FILE *f, assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); if (opt->prefix != NULL) { prefix = opt->prefix; @@ -530,7 +548,7 @@ fsm_print_vmc(FILE *f, } if (opt->fragment) { - if (-1 == fsm_print_cfrag(f, opt, hooks, ops, cp)) { + if (-1 == fsm_print_cfrag(f, opt, hooks, retlist, ops, cp)) { return -1; } } else { @@ -591,7 +609,7 @@ fsm_print_vmc(FILE *f, fprintf(f, ")\n"); fprintf(f, "{\n"); - if (-1 == fsm_print_cfrag(f, opt, hooks, ops, cp)) { + if (-1 == fsm_print_cfrag(f, opt, hooks, retlist, ops, cp)) { return -1; } diff --git a/src/libfsm/print/vmdot.c b/src/libfsm/print/vmdot.c index 576d8c08d..e01fc9011 100644 --- a/src/libfsm/print/vmdot.c +++ b/src/libfsm/print/vmdot.c @@ -25,6 +25,7 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" static const char * @@ -128,10 +129,17 @@ print_end(FILE *f, return print_hook_reject(f, opt, hooks, default_reject, NULL); case VM_END_SUCC: - return print_hook_accept(f, opt, hooks, - op->endids.ids, op->endids.count, + if (-1 == print_hook_accept(f, opt, hooks, + op->ret->ids, op->ret->count, default_accept, - NULL); + NULL)) + { + return -1; + } + + /* no print_hook_comment() for dot output */ + + return 0; default: assert(!"unreached"); @@ -267,9 +275,12 @@ fsm_print_edges(FILE *f, const struct fsm_options *opt, const struct dfavm_op_ir if (op->num_incoming > 0 || op == ops) { if (op != ops && can_fallthrough) { fprintf(f, "\t"); - fprintf(f, "S%lu:s -> S%" PRIu32 ":n [ style = bold ]; /* fallthrough */", + fprintf(f, "S%lu:s -> S%" PRIu32 ":n [ style = bold ];", block, op->index); + if (opt->comments) { + fprintf(f, " /* fallthrough */"); + } fprintf(f, "\n"); } @@ -305,11 +316,14 @@ fsm_print_edges(FILE *f, const struct fsm_options *opt, const struct dfavm_op_ir } else { /* relative branch within the same block, entry on the east */ /* XXX: would like to make these edges shorter, but I don't know how */ - fprintf(f, "S%lu:b%" PRIu32 ":e -> S%lu:b%" PRIu32 ":e [ constraint = false ]; /* relative */", + fprintf(f, "S%lu:b%" PRIu32 ":e -> S%lu:b%" PRIu32 ":e [ constraint = false ];", block, op->index, block, op->u.br.dest_arg->index); + if (opt->comments) { + fprintf(f, " /* relative */"); + } } fprintf(f, "\n"); @@ -321,10 +335,12 @@ static int fsm_print_vmdotfrag(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { assert(f != NULL); assert(opt != NULL); + assert(retlist != NULL); if (-1 == fsm_print_nodes(f, opt, hooks, ops)) { return -1; @@ -340,14 +356,16 @@ int fsm_print_vmdot(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { assert(f != NULL); assert(opt != NULL); assert(hooks != NULL); + assert(retlist != NULL); if (opt->fragment) { - if (-1 == fsm_print_vmdotfrag(f, opt, hooks, ops)) { + if (-1 == fsm_print_vmdotfrag(f, opt, hooks, retlist, ops)) { return -1; } } else { @@ -365,7 +383,7 @@ fsm_print_vmdot(FILE *f, fprintf(f, "\tstart [ shape = none, label = \"\" ];\n"); fprintf(f, "\tstart -> S0:i0:w [ style = bold ];\n"); - if (-1 == fsm_print_vmdotfrag(f, opt, hooks, ops)) { + if (-1 == fsm_print_vmdotfrag(f, opt, hooks, retlist, ops)) { return -1; } diff --git a/src/libfsm/print/vmops.c b/src/libfsm/print/vmops.c index 7eff8d4e9..d95263062 100644 --- a/src/libfsm/print/vmops.c +++ b/src/libfsm/print/vmops.c @@ -25,14 +25,9 @@ #include "libfsm/internal.h" #include "libfsm/print.h" +#include "libfsm/vm/retlist.h" #include "libfsm/vm/vm.h" -enum vmops_dialect { - VMOPS_C, - VMOPS_H, - VMOPS_MAIN, -}; - static const char * cmp_operator(int cmp) { @@ -56,6 +51,7 @@ default_accept(FILE *f, const struct fsm_options *opt, void *lang_opaque, void *hook_opaque) { const char *prefix; + size_t i; assert(f != NULL); assert(opt != NULL); @@ -63,13 +59,21 @@ default_accept(FILE *f, const struct fsm_options *opt, (void) hook_opaque; - prefix = lang_opaque; + if (opt->prefix != NULL) { + prefix = opt->prefix; + } else { + prefix = "fsm_"; + } - // TODO: print ids (void) ids; (void) count; fprintf(f, "%sactionRET, 1", prefix); + if (opt->ambig != AMBIG_NONE) { + i = * (const size_t *) lang_opaque; + + fprintf(f, ", %zu", i); + } return 0; } @@ -82,13 +86,21 @@ default_reject(FILE *f, const struct fsm_options *opt, assert(f != NULL); assert(opt != NULL); - assert(lang_opaque != NULL); + assert(lang_opaque == NULL); + (void) lang_opaque; (void) hook_opaque; - prefix = lang_opaque; + if (opt->prefix != NULL) { + prefix = opt->prefix; + } else { + prefix = "fsm_"; + } fprintf(f, "%sactionRET, 0", prefix); + if (opt->ambig != AMBIG_NONE) { + fprintf(f, ", 0"); + } return 0; } @@ -96,7 +108,9 @@ default_reject(FILE *f, const struct fsm_options *opt, static int print_label(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt) { - fprintf(f, "\t\t/* l%" PRIu32 " */\n", op->index); + if (opt->comments) { + fprintf(f, "\t\t/* l%" PRIu32 " */\n", op->index); + } if (op->example != NULL) { fprintf(f, "\t\t/* e.g. \""); @@ -113,7 +127,7 @@ print_label(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt static int print_cond(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt, const char *prefix) { - fprintf(f, "\t\t{%s%s, ", prefix, cmp_operator(op->cmp)); + fprintf(f, "%s%s, ", prefix, cmp_operator(op->cmp)); if (-1 == c_escputcharlit(f, opt, op->cmp_arg)) { return -1; } @@ -126,23 +140,26 @@ static int print_end(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt, const struct fsm_hooks *hooks, - const char *prefix, + const struct ret_list *retlist, enum dfavm_op_end end_bits) { + size_t i; + switch (end_bits) { case VM_END_FAIL: - if (-1 == print_hook_reject(f, opt, hooks, default_reject, - (void *) prefix)) - { + if (-1 == print_hook_reject(f, opt, hooks, default_reject, NULL)) { return -1; } break; case VM_END_SUCC: + assert(op->ret >= retlist->a); + + i = op->ret - retlist->a; + if (-1 == print_hook_accept(f, opt, hooks, - op->endids.ids, op->endids.count, - default_accept, - (void *) prefix)) + op->ret->ids, op->ret->count, + default_accept, &i)) { return -1; } @@ -153,15 +170,16 @@ print_end(FILE *f, const struct dfavm_op_ir *op, abort(); } - fprintf(f, "},\n"); - return 0; } static int -print_branch(FILE *f, const struct dfavm_op_ir *op, const char *prefix) +print_branch(FILE *f, const struct fsm_options *opt, const struct dfavm_op_ir *op, const char *prefix) { - fprintf(f, "%sactionGOTO, %" PRIu32 "},\n", prefix, op->u.br.dest_arg->index); + fprintf(f, "%sactionGOTO, %" PRIu32, prefix, op->u.br.dest_arg->index); + if (opt->ambig != AMBIG_NONE) { + fprintf(f, ", 0"); + } return 0; } @@ -170,8 +188,9 @@ static int print_fetch(FILE *f, const struct fsm_options *opt, const char *prefix) { - fprintf(f, "\t\t{%sopEOF, 0, ", prefix); + fprintf(f, "%sopEOF, 0, ", prefix); switch (opt->io) { + case FSM_IO_GETC: case FSM_IO_STR: case FSM_IO_PAIR: break; @@ -182,18 +201,65 @@ print_fetch(FILE *f, const struct fsm_options *opt, const char *prefix) return 0; } -/* TODO: eventually to be non-static */ -static int -fsm_print_vmopsfrag(FILE *f, +static void +print_ret(FILE *f, const unsigned *ids, size_t count) +{ + size_t i; + + fprintf(f, "{ (const unsigned []) { "); + for (i = 0; i < count; i++) { + fprintf(f, "%u", ids[i]); + if (i + 1 < count) { + fprintf(f, ", "); + } + } + fprintf(f, " }, %zu }", count); +} + +int +fsm_print_vmops_c(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, - struct dfavm_op_ir *ops, - const char *prefix) + const struct ret_list *retlist, + struct dfavm_op_ir *ops) { - struct dfavm_op_ir *op; + const struct dfavm_op_ir *op; + const char *prefix; assert(f != NULL); assert(opt != NULL); + assert(hooks != NULL); + + if (opt->fragment) { + errno = ENOTSUP; + return -1; + } + + if (opt->prefix != NULL) { + prefix = opt->prefix; + } else { + prefix = "fsm_"; + } + + fprintf(f, "#include \n\n"); + fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix); + fprintf(f, "#include \"%svmops.h\"\n", prefix); + fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix); + fprintf(f, "\n"); + + if (opt->ambig != AMBIG_NONE) { + fprintf(f, "struct %sret %sRet[] = {\n", prefix, prefix); + for (size_t i = 0; i < retlist->count; i++) { + fprintf(f, "\t"); + print_ret(f, retlist->a[i].ids, retlist->a[i].count); + fprintf(f, ",\n"); + } + fprintf(f, "};\n"); + fprintf(f, "const size_t %sRet_count = sizeof %sRet / sizeof *%sRet;\n", prefix, prefix, prefix); + fprintf(f, "\n"); + } + + fprintf(f, "struct %sop %sOps[] = {\n", prefix, prefix); for (op = ops; op != NULL; op = op->next) { if (op->num_incoming > 0) { @@ -201,12 +267,15 @@ fsm_print_vmopsfrag(FILE *f, return -1; } } + + fprintf(f, "\t{"); + switch (op->instr) { case VM_OP_STOP: if (-1 == print_cond(f, op, opt, prefix)) { return -1; } - if (-1 == print_end(f, op, opt, hooks, prefix, op->u.stop.end_bits)) { + if (-1 == print_end(f, op, opt, hooks, retlist, op->u.stop.end_bits)) { return -1; } break; @@ -215,7 +284,7 @@ fsm_print_vmopsfrag(FILE *f, if (-1 == print_fetch(f, opt, prefix)) { return -1; } - if (-1 == print_end(f, op, opt, hooks, prefix, op->u.fetch.end_bits)) { + if (-1 == print_end(f, op, opt, hooks, retlist, op->u.fetch.end_bits)) { return -1; } break; @@ -224,7 +293,7 @@ fsm_print_vmopsfrag(FILE *f, if (-1 == print_cond(f, op, opt, prefix)) { return -1; } - if (-1 == print_branch(f, op, prefix)) { + if (-1 == print_branch(f, opt, op, prefix)) { return -1; } break; @@ -233,17 +302,31 @@ fsm_print_vmopsfrag(FILE *f, assert(!"unreached"); break; } + + fprintf(f, "},"); + + if (op->instr == VM_OP_STOP && op->u.stop.end_bits == VM_END_SUCC) { + if (-1 == print_hook_comment(f, opt, hooks, + op->ret->ids, op->ret->count)) + { + return -1; + } + } + + fprintf(f, "\n"); } + fprintf(f, "\t};\n"); + return 0; } int -fsm_print_vmops(FILE *f, +fsm_print_vmops_h(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, - struct dfavm_op_ir *ops, - enum vmops_dialect dialect) + const struct ret_list *retlist, + struct dfavm_op_ir *ops) { const char *prefix; @@ -251,204 +334,240 @@ fsm_print_vmops(FILE *f, assert(opt != NULL); assert(hooks != NULL); + (void) retlist; + (void) ops; + + if (opt->fragment) { + errno = ENOTSUP; + return -1; + } + if (opt->prefix != NULL) { prefix = opt->prefix; } else { prefix = "fsm_"; } - if (opt->fragment) { - if (dialect == VMOPS_C) { - if (-1 == fsm_print_vmopsfrag(f, opt, hooks, ops, prefix)) { - return -1; - } - } - } else { - switch (dialect) { - case VMOPS_C: - fprintf(f, "#include \n\n"); - fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix); - fprintf(f, "#include \"%svmops.h\"\n", prefix); - fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix); - fprintf(f, "struct %sop %sOps[] = {\n", prefix, prefix); - if (-1 == fsm_print_vmopsfrag(f, opt, hooks, ops, prefix)) { - return -1; - } - fprintf(f, "\t};\n"); - break; - - case VMOPS_H: - fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix); - fprintf(f, "#define %sLIBFSM_VMOPS_H\n", prefix); - fprintf(f, "#include \n\n"); - fprintf(f, "enum %svmOp { %sopEOF, %sopLT, %sopLE, %sopEQ, %sopNE, %sopGE, %sopGT, %sopALWAYS};\n", - prefix, prefix, prefix, prefix, prefix, prefix, prefix, prefix, prefix); - fprintf(f, "enum %sactionOp { %sactionRET, %sactionGOTO };\n", prefix, prefix, prefix); - fprintf(f, "struct %sop { enum %svmOp op; unsigned char c; enum %sactionOp action; int32_t arg; };\n\n", - prefix, prefix, prefix); - fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix); - break; - - case VMOPS_MAIN: - fprintf(f, "#include \n"); - fprintf(f, "#include \n"); - fprintf(f, "#include \n\n"); - fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix); - fprintf(f, "#include \"%svmops.h\"\n", prefix); - fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix); - fprintf(f, "extern struct %sop %sOps[];\n", prefix, prefix); - fprintf(f, "\n"); - - switch (opt->io) { - case FSM_IO_PAIR: - fprintf(f, "int %smatch(const char *b, const char *e)\n", prefix); - break; - - case FSM_IO_STR: - fprintf(f, "int %smatch(const char *s)\n", prefix); - break; - - case FSM_IO_GETC: - errno = ENOTSUP; - return -1; - } - fprintf(f, "{\n"); - fprintf(f, "\tunsigned int i = 0;\n"); - fprintf(f, "\t/* The compiler doesn't know the op stream will have fetch before the first comparison. */\n"); - fprintf(f, "\t/* Initialize to zero to prevent maybe-uninitialized warning. */\n"); - fprintf(f, "\tunsigned char c = 0;\n"); - fprintf(f, "\tint ok;\n"); - fprintf(f, "\tstruct %sop *ops = %sOps;\n", prefix, prefix); - - switch (opt->io) { - case FSM_IO_PAIR: - fprintf(f, "\tconst char *p = b;\n"); - break; - - case FSM_IO_STR: - fprintf(f, "\tconst char *p = s;\n"); - break; - - case FSM_IO_GETC: - errno = ENOTSUP; - return -1; - } - - fprintf(f, "\n"); - fprintf(f, "\tfor (;;) {\n"); - fprintf(f, "\t\tok = 0;\n"); - fprintf(f, "\t\tswitch (ops[i].op) {\n"); - fprintf(f, "\t\tcase %sopEOF:\n", prefix); - - switch (opt->io) { - case FSM_IO_PAIR: - fprintf(f, "\t\t\tif (p < e) {\n"); - fprintf(f, "\t\t\t\t/* not at EOF */\n"); - fprintf(f, "\t\t\t\tc = *p++;\n"); - fprintf(f, "\t\t\t\ti++;\n"); - fprintf(f, "\t\t\t\tcontinue;\n"); - fprintf(f, "\t\t\t}\n"); - break; - - case FSM_IO_STR: - fprintf(f, "\t\t\tc = *p++;\n"); - fprintf(f, "\t\t\tif (c != '\\0') {\n"); - fprintf(f, "\t\t\t\t/* not at EOF */\n"); - fprintf(f, "\t\t\t\ti++;\n"); - fprintf(f, "\t\t\t\tcontinue;\n"); - fprintf(f, "\t\t\t}\n"); - break; - - case FSM_IO_GETC: - errno = ENOTSUP; - return -1; - } - - fprintf(f, "\t\t\tok = 1;\n"); - fprintf(f, "\t\t\tbreak;\n"); - fprintf(f, "\t\tcase %sopLT: ok = c < ops[i].c; break;\n", prefix); - fprintf(f, "\t\tcase %sopLE: ok = c <= ops[i].c; break;\n", prefix); - fprintf(f, "\t\tcase %sopEQ: ok = c == ops[i].c; break;\n", prefix); - fprintf(f, "\t\tcase %sopNE: ok = c != ops[i].c; break;\n", prefix); - fprintf(f, "\t\tcase %sopGE: ok = c >= ops[i].c; break;\n", prefix); - fprintf(f, "\t\tcase %sopGT: ok = c > ops[i].c; break;\n", prefix); - fprintf(f, "\t\tcase %sopALWAYS: ok = 1; break;\n", prefix); - fprintf(f, "\t\t}\n"); - fprintf(f, "\t\tif (ok) {\n"); - fprintf(f, "\t\t\tif (ops[i].action == %sactionRET) {\n", prefix); - fprintf(f, "\t\t\t\treturn (int) (ops[i].arg);\n"); - fprintf(f, "\t\t\t}\n"); - fprintf(f, "\t\t\ti = ops[i].arg;\n"); - fprintf(f, "\t\t\tcontinue;\n"); - fprintf(f, "\t\t}\n"); - fprintf(f, "\t\ti++;\n"); - fprintf(f, "\t}\n"); - fprintf(f, "}\n"); - fprintf(f, "\n"); - fprintf(f, "#define %sBUFFER_SIZE (1024)\n", prefix); - fprintf(f, "\n"); - fprintf(f, "int main(void)\n"); - fprintf(f, "{\n"); - fprintf(f, "\tchar *buf, *p;\n"); - fprintf(f, "\tint r;\n"); - fprintf(f, "\n"); - fprintf(f, "\tbuf = malloc(%sBUFFER_SIZE);\n", prefix); - fprintf(f, "\tif (!buf) {\n"); - fprintf(f, "\t\tperror(\"malloc\");\n"); - fprintf(f, "\t\texit(1);\n"); - fprintf(f, "\t}\n\n"); - fprintf(f, "\tfor (;;) {\n"); - fprintf(f, "\t\tp = fgets(buf, %sBUFFER_SIZE, stdin);\n", prefix); - fprintf(f, "\t\tif (!p) {\n"); - fprintf(f, "\t\t\tbreak;\n"); - fprintf(f, "\t\t}\n"); - - switch (opt->io) { - case FSM_IO_PAIR: - fprintf(f, "\t\tr = %smatch(p, p + strlen(p));\n", prefix); - break; - case FSM_IO_STR: - fprintf(f, "\t\tr = %smatch(p);\n", prefix); - break; - case FSM_IO_GETC: - errno = ENOTSUP; - return -1; - } - fprintf(f, "\t\tprintf(\"%%smatch\\n\", r ? \"no \" : \"\");\n"); - fprintf(f, "\t}\n"); - fprintf(f, "\treturn 0;\n"); - fprintf(f, "}\n"); - break; - } + fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix); + fprintf(f, "#define %sLIBFSM_VMOPS_H\n", prefix); + fprintf(f, "#include \n\n"); + fprintf(f, "#include \n\n"); + fprintf(f, "enum %svmOp { %sopEOF, %sopLT, %sopLE, %sopEQ, %sopNE, %sopGE, %sopGT, %sopALWAYS};\n", + prefix, prefix, prefix, prefix, prefix, prefix, prefix, prefix, prefix); + fprintf(f, "enum %sactionOp { %sactionRET, %sactionGOTO };\n", prefix, prefix, prefix); + if (opt->ambig != AMBIG_NONE) { + fprintf(f, "struct %sret { const unsigned *ids; size_t count; };\n\n", prefix); + } + fprintf(f, "struct %sop { enum %svmOp op; unsigned char c; enum %sactionOp action; int32_t arg; int32_t ret; };\n\n", + prefix, prefix, prefix); + if (opt->ambig != AMBIG_NONE) { + fprintf(f, "extern struct %sret %sRet[];\n", prefix, prefix); + fprintf(f, "extern const size_t %sRet_count;\n", prefix); } + fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix); return 0; } int -fsm_print_vmops_c(FILE *f, +fsm_print_vmops_main(FILE *f, const struct fsm_options *opt, const struct fsm_hooks *hooks, + const struct ret_list *retlist, struct dfavm_op_ir *ops) { - return fsm_print_vmops(f, opt, hooks, ops, VMOPS_C); -} + const char *prefix; -int -fsm_print_vmops_h(FILE *f, - const struct fsm_options *opt, - const struct fsm_hooks *hooks, - struct dfavm_op_ir *ops) -{ - return fsm_print_vmops(f, opt, hooks, ops, VMOPS_H); -} + assert(f != NULL); + assert(opt != NULL); + assert(hooks != NULL); -int -fsm_print_vmops_main(FILE *f, - const struct fsm_options *opt, - const struct fsm_hooks *hooks, - struct dfavm_op_ir *ops) -{ - return fsm_print_vmops(f, opt, hooks, ops, VMOPS_MAIN); + (void) retlist; + (void) ops; + + if (opt->fragment) { + errno = ENOTSUP; + return -1; + } + + if (opt->prefix != NULL) { + prefix = opt->prefix; + } else { + prefix = "fsm_"; + } + + fprintf(f, "#include \n"); + fprintf(f, "#include \n"); + fprintf(f, "#include \n\n"); + fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix); + fprintf(f, "#include \"%svmops.h\"\n", prefix); + fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix); + fprintf(f, "extern struct %sop %sOps[];\n", prefix, prefix); + if (opt->ambig != AMBIG_NONE) { + fprintf(f, "extern struct %sret %sRet[];\n", prefix, prefix); + fprintf(f, "extern const size_t %sRet_count;\n", prefix); + } + fprintf(f, "\n"); + + fprintf(f, "int %smatch(", prefix); + switch (opt->io) { + case FSM_IO_PAIR: + fprintf(f, "const char *b, const char *e"); + break; + + case FSM_IO_STR: + fprintf(f, "const char *s"); + break; + + case FSM_IO_GETC: + errno = ENOTSUP; + return -1; + } + if (opt->ambig != AMBIG_NONE) { + fprintf(f, ",\n"); + fprintf(f, "\tconst unsigned **ids, size_t *count"); + } + fprintf(f, ")\n"); + + fprintf(f, "{\n"); + fprintf(f, "\tunsigned int i = 0;\n"); + if (opt->comments) { + fprintf(f, "\t/* The compiler doesn't know the op stream will have fetch before the first comparison. */\n"); + fprintf(f, "\t/* Initialize to zero to prevent maybe-uninitialized warning. */\n"); + } + fprintf(f, "\tunsigned char c = 0;\n"); + fprintf(f, "\tint ok;\n"); + fprintf(f, "\tstruct %sop *ops = %sOps;\n", prefix, prefix); + + switch (opt->io) { + case FSM_IO_PAIR: + fprintf(f, "\tconst char *p = b;\n"); + break; + + case FSM_IO_STR: + fprintf(f, "\tconst char *p = s;\n"); + break; + + case FSM_IO_GETC: + errno = ENOTSUP; + return -1; + } + + fprintf(f, "\n"); + fprintf(f, "\tfor (;;) {\n"); + fprintf(f, "\t\tok = 0;\n"); + fprintf(f, "\t\tswitch (ops[i].op) {\n"); + fprintf(f, "\t\tcase %sopEOF:\n", prefix); + + switch (opt->io) { + case FSM_IO_PAIR: + fprintf(f, "\t\t\tif (p < e) {\n"); + if (opt->comments) { + fprintf(f, "\t\t\t\t/* not at EOF */\n"); + } + fprintf(f, "\t\t\t\tc = *p++;\n"); + fprintf(f, "\t\t\t\ti++;\n"); + fprintf(f, "\t\t\t\tcontinue;\n"); + fprintf(f, "\t\t\t}\n"); + break; + + case FSM_IO_STR: + fprintf(f, "\t\t\tc = *p++;\n"); + fprintf(f, "\t\t\tif (c != '\\0') {\n"); + if (opt->comments) { + fprintf(f, "\t\t\t\t/* not at EOF */\n"); + } + fprintf(f, "\t\t\t\ti++;\n"); + fprintf(f, "\t\t\t\tcontinue;\n"); + fprintf(f, "\t\t\t}\n"); + break; + + case FSM_IO_GETC: + errno = ENOTSUP; + return -1; + } + + fprintf(f, "\t\t\tok = 1;\n"); + fprintf(f, "\t\t\tbreak;\n"); + fprintf(f, "\t\tcase %sopLT: ok = c < ops[i].c; break;\n", prefix); + fprintf(f, "\t\tcase %sopLE: ok = c <= ops[i].c; break;\n", prefix); + fprintf(f, "\t\tcase %sopEQ: ok = c == ops[i].c; break;\n", prefix); + fprintf(f, "\t\tcase %sopNE: ok = c != ops[i].c; break;\n", prefix); + fprintf(f, "\t\tcase %sopGE: ok = c >= ops[i].c; break;\n", prefix); + fprintf(f, "\t\tcase %sopGT: ok = c > ops[i].c; break;\n", prefix); + fprintf(f, "\t\tcase %sopALWAYS: ok = 1; break;\n", prefix); + fprintf(f, "\t\t}\n"); + fprintf(f, "\t\tif (ok) {\n"); + fprintf(f, "\t\t\tif (ops[i].action == %sactionRET) {\n", prefix); + if (opt->ambig != AMBIG_NONE) { + fprintf(f, "\t\t\t\tif (ops[i].arg) {\n"); + fprintf(f, "\t\t\t\t\t*ids = %sRet[ops[i].ret].ids;\n", prefix); + fprintf(f, "\t\t\t\t\t*count = %sRet[ops[i].ret].count;\n", prefix); + fprintf(f, "\t\t\t\t}\n"); + } + fprintf(f, "\t\t\t\treturn (int) (ops[i].arg);\n"); + fprintf(f, "\t\t\t}\n"); + fprintf(f, "\t\t\ti = ops[i].arg;\n"); + fprintf(f, "\t\t\tcontinue;\n"); + fprintf(f, "\t\t}\n"); + fprintf(f, "\t\ti++;\n"); + fprintf(f, "\t}\n"); + fprintf(f, "}\n"); + fprintf(f, "\n"); + fprintf(f, "#define %sBUFFER_SIZE (1024)\n", prefix); + fprintf(f, "\n"); + fprintf(f, "int main(void)\n"); + fprintf(f, "{\n"); + fprintf(f, "\tchar *buf, *p;\n"); + if (opt->ambig != AMBIG_NONE) { + fprintf(f, "\tconst unsigned *ids;\n"); + fprintf(f, "\tsize_t count, i;\n"); + } + fprintf(f, "\tint r;\n"); + fprintf(f, "\n"); + fprintf(f, "\tbuf = malloc(%sBUFFER_SIZE);\n", prefix); + fprintf(f, "\tif (!buf) {\n"); + fprintf(f, "\t\tperror(\"malloc\");\n"); + fprintf(f, "\t\texit(1);\n"); + fprintf(f, "\t}\n\n"); + fprintf(f, "\tfor (;;) {\n"); + fprintf(f, "\t\tp = fgets(buf, %sBUFFER_SIZE, stdin);\n", prefix); + fprintf(f, "\t\tif (!p) {\n"); + fprintf(f, "\t\t\tbreak;\n"); + fprintf(f, "\t\t}\n"); + + fprintf(f, "\t\tr = %smatch(", prefix); + switch (opt->io) { + case FSM_IO_PAIR: + fprintf(f, "p, p + strlen(p)"); + break; + case FSM_IO_STR: + fprintf(f, "p"); + break; + case FSM_IO_GETC: + // TODO: getc from string buffer + errno = ENOTSUP; + return -1; + } + if (opt->ambig != AMBIG_NONE) { + fprintf(f, ", &ids, &count"); + } + fprintf(f, ");\n"); + + fprintf(f, "\t\tprintf(\"%%smatch\\n\", r == 0 ? \"no \" : \"\");\n"); + if (opt->ambig != AMBIG_NONE) { + fprintf(f, "\t\tif (r) {\n"); + fprintf(f, "\t\t\tprintf(\"ids:\");\n"); + fprintf(f, "\t\t\tfor (i = 0; i < count; i++) {\n"); + fprintf(f, "\t\t\t\tprintf(\" %%u\", ids[i]);\n"); + fprintf(f, "\t\t\t}\n"); + fprintf(f, "\t\t\tprintf(\"\\n\");\n"); + fprintf(f, "\t\t}\n"); + } + fprintf(f, "\t}\n"); + fprintf(f, "\treturn 0;\n"); + fprintf(f, "}\n"); + + return 0; } diff --git a/src/libfsm/vm.c b/src/libfsm/vm.c index c9205815f..0eb4fa16c 100644 --- a/src/libfsm/vm.c +++ b/src/libfsm/vm.c @@ -18,6 +18,7 @@ #include "internal.h" #include "vm/vm.h" +#include "vm/retlist.h" #include "print/ir.h" // VM state: @@ -92,6 +93,7 @@ fsm_vm_compile_with_options(const struct fsm *fsm, static const struct dfavm_assembler_ir zero; struct dfavm_assembler_ir a; struct ir *ir; + struct ret_list retlist; struct fsm_dfavm *vm; assert(fsm != NULL); @@ -102,9 +104,15 @@ fsm_vm_compile_with_options(const struct fsm *fsm, return NULL; } + if (!build_retlist(&retlist, ir)) { + free_ir(fsm, ir); + return NULL; + } + a = zero; - if (!dfavm_compile_ir(&a, ir, vmopts)) { + if (!dfavm_compile_ir(&a, ir, &retlist, vmopts)) { + free_retlist(&retlist); free_ir(fsm, ir); return NULL; } @@ -116,6 +124,7 @@ fsm_vm_compile_with_options(const struct fsm *fsm, return NULL; } + free_retlist(&retlist); dfavm_opasm_finalize_op(&a); return vm; diff --git a/src/libfsm/vm/Makefile b/src/libfsm/vm/Makefile index fe54811b8..ce32cc585 100644 --- a/src/libfsm/vm/Makefile +++ b/src/libfsm/vm/Makefile @@ -4,6 +4,7 @@ SRC += src/libfsm/vm/ir.c SRC += src/libfsm/vm/vm.c SRC += src/libfsm/vm/v1.c SRC += src/libfsm/vm/v2.c +SRC += src/libfsm/vm/retlist.c .for src in ${SRC:Msrc/libfsm/vm/*.c} CFLAGS.${src} += -I src # XXX: for internal.h diff --git a/src/libfsm/vm/ir.c b/src/libfsm/vm/ir.c index f27df508d..608871b61 100644 --- a/src/libfsm/vm/ir.c +++ b/src/libfsm/vm/ir.c @@ -18,6 +18,7 @@ #include "libfsm/internal.h" +#include "retlist.h" #include "vm.h" #include "print/ir.h" @@ -277,7 +278,8 @@ opasm_free_list(struct dfavm_assembler_ir *a, struct dfavm_op_ir *op) } static struct dfavm_op_ir * -opasm_new(struct dfavm_assembler_ir *a, enum dfavm_op_instr instr, enum dfavm_op_cmp cmp, unsigned char arg, +opasm_new(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + enum dfavm_op_instr instr, enum dfavm_op_cmp cmp, unsigned char arg, const struct ir_state *ir_state) { static const struct dfavm_op_ir zero; @@ -306,20 +308,22 @@ opasm_new(struct dfavm_assembler_ir *a, enum dfavm_op_instr instr, enum dfavm_op if (ir_state != NULL) { op->example = ir_state->example; - op->endids.ids = ir_state->endids.ids; - op->endids.count = ir_state->endids.count; + op->ret = ir_state->isend + ? find_ret(retlist, ir_state->endids.ids, ir_state->endids.count) + : NULL; } return op; } static struct dfavm_op_ir * -opasm_new_fetch(struct dfavm_assembler_ir *a, unsigned state, enum dfavm_op_end end, +opasm_new_fetch(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + unsigned state, enum dfavm_op_end end, const struct ir_state *ir_state) { struct dfavm_op_ir *op; - op = opasm_new(a, VM_OP_FETCH, VM_CMP_ALWAYS, 0, ir_state); + op = opasm_new(a, retlist, VM_OP_FETCH, VM_CMP_ALWAYS, 0, ir_state); if (op == NULL) { return NULL; } @@ -331,12 +335,13 @@ opasm_new_fetch(struct dfavm_assembler_ir *a, unsigned state, enum dfavm_op_end } static struct dfavm_op_ir * -opasm_new_stop(struct dfavm_assembler_ir *a, enum dfavm_op_cmp cmp, unsigned char arg, enum dfavm_op_end end, +opasm_new_stop(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + enum dfavm_op_cmp cmp, unsigned char arg, enum dfavm_op_end end, const struct ir_state *ir_state) { struct dfavm_op_ir *op; - op = opasm_new(a, VM_OP_STOP, cmp, arg, ir_state); + op = opasm_new(a, retlist, VM_OP_STOP, cmp, arg, ir_state); if (op == NULL) { return NULL; } @@ -347,14 +352,15 @@ opasm_new_stop(struct dfavm_assembler_ir *a, enum dfavm_op_cmp cmp, unsigned cha } static struct dfavm_op_ir * -opasm_new_branch(struct dfavm_assembler_ir *a, enum dfavm_op_cmp cmp, unsigned char arg, uint32_t dest_state, +opasm_new_branch(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + enum dfavm_op_cmp cmp, unsigned char arg, uint32_t dest_state, const struct ir_state *ir_state) { struct dfavm_op_ir *op; assert(dest_state < a->nstates); - op = opasm_new(a, VM_OP_BRANCH, cmp, arg, ir_state); + op = opasm_new(a, retlist, VM_OP_BRANCH, cmp, arg, ir_state); if (op == NULL) { return NULL; } @@ -488,7 +494,8 @@ analyze_table(struct dfa_table *table) } static int -xlate_table_ranges(struct dfavm_assembler_ir *a, struct dfa_table *table, struct dfavm_op_ir **opp) +xlate_table_ranges(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + struct dfa_table *table, struct dfavm_op_ir **opp) { int i,lo; int count = 0; @@ -508,8 +515,8 @@ xlate_table_ranges(struct dfavm_assembler_ir *a, struct dfa_table *table, struct enum dfavm_op_cmp cmp = (i > lo+1) ? VM_CMP_LE : VM_CMP_EQ; op = (dst < 0) - ? opasm_new_stop(a, cmp, arg, VM_END_FAIL, table->ir_state) - : opasm_new_branch(a, cmp, arg, dst, table->ir_state); + ? opasm_new_stop(a, retlist, cmp, arg, VM_END_FAIL, table->ir_state) + : opasm_new_branch(a, retlist, cmp, arg, dst, table->ir_state); if (op == NULL) { return -1; @@ -526,8 +533,8 @@ xlate_table_ranges(struct dfavm_assembler_ir *a, struct dfa_table *table, struct if (lo < FSM_SIGMA_COUNT) { int64_t dst = table->tbl[lo]; *opp = (dst < 0) - ? opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_FAIL, table->ir_state) - : opasm_new_branch(a, VM_CMP_ALWAYS, 0, dst, table->ir_state); + ? opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_FAIL, table->ir_state) + : opasm_new_branch(a, retlist, VM_CMP_ALWAYS, 0, dst, table->ir_state); if (*opp == NULL) { return -1; } @@ -539,7 +546,8 @@ xlate_table_ranges(struct dfavm_assembler_ir *a, struct dfa_table *table, struct } static int -xlate_table_cases(struct dfavm_assembler_ir *a, struct dfa_table *table, struct dfavm_op_ir **opp) +xlate_table_cases(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + struct dfa_table *table, struct dfavm_op_ir **opp) { int i, count = 0; int64_t mdst = table->mode.to; @@ -554,8 +562,8 @@ xlate_table_cases(struct dfavm_assembler_ir *a, struct dfa_table *table, struct } *opp = (dst < 0) - ? opasm_new_stop(a, VM_CMP_EQ, i, VM_END_FAIL, table->ir_state) - : opasm_new_branch(a, VM_CMP_EQ, i, dst, table->ir_state); + ? opasm_new_stop(a, retlist, VM_CMP_EQ, i, VM_END_FAIL, table->ir_state) + : opasm_new_branch(a, retlist, VM_CMP_EQ, i, dst, table->ir_state); if (*opp == NULL) { return -1; } @@ -564,8 +572,8 @@ xlate_table_cases(struct dfavm_assembler_ir *a, struct dfa_table *table, struct } *opp = (mdst < 0) - ? opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_FAIL, table->ir_state) - : opasm_new_branch(a, VM_CMP_ALWAYS, 0, mdst, table->ir_state); + ? opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_FAIL, table->ir_state) + : opasm_new_branch(a, retlist, VM_CMP_ALWAYS, 0, mdst, table->ir_state); if (*opp == NULL) { return -1; } @@ -576,7 +584,8 @@ xlate_table_cases(struct dfavm_assembler_ir *a, struct dfa_table *table, struct } static int -initial_translate_table(struct dfavm_assembler_ir *a, struct dfa_table *table, struct dfavm_op_ir **opp) +initial_translate_table(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + struct dfa_table *table, struct dfavm_op_ir **opp) { int count, best_count; struct dfavm_op_ir *op, *best_op; @@ -604,13 +613,13 @@ initial_translate_table(struct dfavm_assembler_ir *a, struct dfa_table *table, s assert(dst >= 0); assert((size_t)dst < a->nstates); - *opp = opasm_new_stop(a, VM_CMP_NE, sym, VM_END_FAIL, table->ir_state); + *opp = opasm_new_stop(a, retlist, VM_CMP_NE, sym, VM_END_FAIL, table->ir_state); if (*opp == NULL) { return -1; } opp = &(*opp)->next; - *opp = opasm_new_branch(a, VM_CMP_ALWAYS, 0, dst, table->ir_state); + *opp = opasm_new_branch(a, retlist, VM_CMP_ALWAYS, 0, dst, table->ir_state); if (*opp == NULL) { return -1; } @@ -620,10 +629,10 @@ initial_translate_table(struct dfavm_assembler_ir *a, struct dfa_table *table, s } best_op = NULL; - best_count = xlate_table_ranges(a, table, &best_op); + best_count = xlate_table_ranges(a, retlist, table, &best_op); op = NULL; - count = xlate_table_cases(a, table, &op); + count = xlate_table_cases(a, retlist, table, &op); if (count < best_count) { opasm_free_list(a,best_op); @@ -682,7 +691,8 @@ dfa_table_init(struct dfa_table *table, long default_dest, const struct ir_state } static int -initial_translate_partial(struct dfavm_assembler_ir *a, struct ir_state *st, struct dfavm_op_ir **opp) +initial_translate_partial(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + struct ir_state *st, struct dfavm_op_ir **opp) { struct dfa_table table; size_t i, ngrps; @@ -696,11 +706,12 @@ initial_translate_partial(struct dfavm_assembler_ir *a, struct ir_state *st, str group_to_table(&table, &st->u.partial.groups[i]); } - return initial_translate_table(a, &table, opp); + return initial_translate_table(a, retlist, &table, opp); } static int -initial_translate_dominant(struct dfavm_assembler_ir *a, struct ir_state *st, struct dfavm_op_ir **opp) +initial_translate_dominant(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + struct ir_state *st, struct dfavm_op_ir **opp) { struct dfa_table table; size_t i, ngrps; @@ -714,11 +725,12 @@ initial_translate_dominant(struct dfavm_assembler_ir *a, struct ir_state *st, st group_to_table(&table, &st->u.dominant.groups[i]); } - return initial_translate_table(a, &table, opp); + return initial_translate_table(a, retlist, &table, opp); } static int -initial_translate_error(struct dfavm_assembler_ir *a, struct ir_state *st, struct dfavm_op_ir **opp) +initial_translate_error(struct dfavm_assembler_ir *a, const struct ret_list *retlist, + struct ir_state *st, struct dfavm_op_ir **opp) { struct dfa_table table; size_t i, ngrps; @@ -734,11 +746,13 @@ initial_translate_error(struct dfavm_assembler_ir *a, struct ir_state *st, struc group_to_table(&table, &st->u.error.groups[i]); } - return initial_translate_table(a, &table, opp); + return initial_translate_table(a, retlist, &table, opp); } static struct dfavm_op_ir * -initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, size_t ind) +initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, + const struct ret_list *retlist, + size_t ind) { struct ir_state *st; struct dfavm_op_ir **opp; @@ -747,22 +761,22 @@ initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, size_ opp = &a->ops[ind]; if (st->isend && st->strategy == IR_SAME && st->u.same.to == ind) { - *opp = opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_SUCC, st); + *opp = opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_SUCC, st); return a->ops[ind]; } - *opp = opasm_new_fetch(a, ind, (st->isend) ? VM_END_SUCC : VM_END_FAIL, st); + *opp = opasm_new_fetch(a, retlist, ind, (st->isend) ? VM_END_SUCC : VM_END_FAIL, st); opp = &(*opp)->next; assert(*opp == NULL); switch (st->strategy) { case IR_NONE: - *opp = opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_FAIL, st); + *opp = opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_FAIL, st); opp = &(*opp)->next; break; case IR_SAME: - *opp = opasm_new_branch(a, VM_CMP_ALWAYS, 0, st->u.same.to, st); + *opp = opasm_new_branch(a, retlist, VM_CMP_ALWAYS, 0, st->u.same.to, st); opp = &(*opp)->next; break; @@ -774,19 +788,19 @@ initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, size_ * intelligently. */ case IR_PARTIAL: - if (initial_translate_partial(a, st, opp) < 0) { + if (initial_translate_partial(a, retlist, st, opp) < 0) { return NULL; } break; case IR_DOMINANT: - if (initial_translate_dominant(a, st, opp) < 0) { + if (initial_translate_dominant(a, retlist, st, opp) < 0) { return NULL; } break; case IR_ERROR: - if (initial_translate_error(a, st, opp) < 0) { + if (initial_translate_error(a, retlist, st, opp) < 0) { return NULL; } break; @@ -804,20 +818,6 @@ initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, size_ return a->ops[ind]; } -static int -initial_translate(const struct ir *ir, struct dfavm_assembler_ir *a) -{ - size_t i,n; - - n = a->nstates; - - for (i=0; i < n; i++) { - a->ops[i] = initial_translate_state(a, ir, i); - } - - return 0; -} - static void fixup_dests(struct dfavm_assembler_ir *a) { @@ -1063,8 +1063,15 @@ print_all_states(struct dfavm_assembler_ir *a) } int -dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, struct fsm_vm_compile_opts opts) +dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, const struct ret_list *retlist, + struct fsm_vm_compile_opts opts) { + size_t i; + + assert(a != NULL); + assert(ir != NULL); + assert(retlist != NULL); + a->nstates = ir->n; a->start = ir->start; @@ -1080,8 +1087,8 @@ dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, struct fsm_v return 0; } - if (initial_translate(ir, a) < 0) { - return 0; + for (i=0; i < a->nstates; i++) { + a->ops[i] = initial_translate_state(a, ir, retlist, i); } fixup_dests(a); @@ -1121,7 +1128,7 @@ dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, struct fsm_v return 0; } - a->ops[0] = opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_FAIL, NULL); + a->ops[0] = opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_FAIL, NULL); if (a->ops[0] == NULL) { return -1; } diff --git a/src/libfsm/vm/retlist.c b/src/libfsm/vm/retlist.c new file mode 100644 index 000000000..97d4eb3f0 --- /dev/null +++ b/src/libfsm/vm/retlist.c @@ -0,0 +1,147 @@ +/* + * Copyright 2008-2024 Katherine Flavel + * + * See LICENCE for the full copyright terms. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "libfsm/internal.h" + +#include "libfsm/print/ir.h" +#include "libfsm/vm/retlist.h" + +static bool +append_ret(struct ret_list *list, + const fsm_end_id_t *ids, size_t count) +{ + const size_t low = 16; /* must be power of 2 */ + const size_t factor = 2; /* must be even */ + + assert(list != NULL); + + // TODO: alloc callbacks + if (list->count == 0) { + list->a = malloc(low * sizeof *list->a); + if (list->a == NULL) { + return false; + } + } else if (list->count >= low && (list->count & (list->count - 1)) == 0) { + void *tmp; + size_t new = list->count * factor; + if (new < list->count) { + errno = E2BIG; + perror("realloc"); + exit(EXIT_FAILURE); + } + + tmp = realloc(list->a, new * sizeof *list->a); + if (tmp == NULL) { + return false; + } + + list->a = tmp; + } + + list->a[list->count].ids = ids; + list->a[list->count].count = count; + + list->count++; + + return true; +} + +static int +cmp_ret(const void *pa, const void *pb) +{ + const struct ret *a = pa; + const struct ret *b = pb; + + if (a->count < b->count) { return -1; } + if (a->count > b->count) { return +1; } + + if (a->count == 0) { + return 0; + } + + assert(a->ids != NULL); + assert(b->ids != NULL); + + return memcmp(a->ids, b->ids, a->count * sizeof *a->ids); +} + +struct ret * +find_ret(const struct ret_list *list, + const fsm_end_id_t *ids, size_t count) +{ + struct ret key; + + key.count = count; + key.ids = ids; + + return bsearch(&key, list->a, list->count, sizeof *list->a, cmp_ret); +} + +bool +build_retlist(struct ret_list *list, const struct ir *ir) +{ + size_t i; + + assert(list != NULL); + assert(ir != NULL); + + list->count = 0; + + for (i = 0; i < ir->n; i++) { + if (!ir->states[i].isend) { + continue; + } + + if (!append_ret(list, ir->states[i].endids.ids, ir->states[i].endids.count)) { + return false; + } + } + + if (list->count > 0) { + size_t j = 0; + + /* sort for both dedup and bsearch */ + qsort(list->a, list->count, sizeof *list->a, cmp_ret); + + /* deduplicate based on endids only. + * j is the start of a run; i increments until we find + * the start of the next run */ + for (size_t i = 1; i < list->count; i++) { + assert(i > j); + if (cmp_ret(&list->a[j], &list->a[i]) == 0) { + continue; + } + + j++; + list->a[j] = list->a[i]; + } + + list->count = j + 1; + + assert(list->count > 0); + } + + return true; +} + +void +free_retlist(struct ret_list *list) +{ + if (list->count > 0) { + free(list->a); + } +} + diff --git a/src/libfsm/vm/retlist.h b/src/libfsm/vm/retlist.h new file mode 100644 index 000000000..5e67218b9 --- /dev/null +++ b/src/libfsm/vm/retlist.h @@ -0,0 +1,32 @@ +/* + * Copyright 2024 Katherine Flavel + * + * See LICENCE for the full copyright terms. + */ + +#ifndef FSM_INTERNAL_RETLIST_H +#define FSM_INTERNAL_RETLIST_H + +struct ir; + +struct ret { + size_t count; + const fsm_end_id_t *ids; +}; + +struct ret_list { + size_t count; + struct ret *a; +}; + +struct ret * +find_ret(const struct ret_list *list, const fsm_end_id_t *ids, size_t count); + +bool +build_retlist(struct ret_list *list, const struct ir *ir); + +void +free_retlist(struct ret_list *list); + +#endif + diff --git a/src/libfsm/vm/vm.h b/src/libfsm/vm/vm.h index bfb28a774..c96bfa84b 100644 --- a/src/libfsm/vm/vm.h +++ b/src/libfsm/vm/vm.h @@ -20,6 +20,8 @@ #define DFAVM_MAGIC "DFAVM$" struct ir; +struct ret; +struct ret_list; struct fsm_vm_compile_opts; struct dfavm_op_ir_pool; @@ -74,15 +76,8 @@ struct dfavm_op_ir { */ uint32_t index; - - -const char *example; - -struct { - fsm_end_id_t *ids; /* NULL -> 0 */ - size_t count; -} endids; - + const char *example; + const struct ret *ret; uint32_t num_incoming; // number of branches to this instruction int in_trace; @@ -198,7 +193,7 @@ const char * cmp_name(int cmp); int -dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, struct fsm_vm_compile_opts opts); +dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, const struct ret_list *retlist, struct fsm_vm_compile_opts opts); struct fsm_dfavm * dfavm_compile_vm(const struct dfavm_assembler_ir *a, struct fsm_vm_compile_opts opts); diff --git a/src/re/main.c b/src/re/main.c index 86258f318..53abe2d52 100644 --- a/src/re/main.c +++ b/src/re/main.c @@ -334,30 +334,19 @@ conflict(FILE *f, const struct fsm_options *opt, } static int -accept_c(FILE *f, const struct fsm_options *opt, +comment_c(FILE *f, const struct fsm_options *opt, const fsm_end_id_t *ids, size_t count, - void *lang_opaque, void *hook_opaque) + void *hook_opaque) { - unsigned n; size_t i; assert(opt != NULL); - assert(lang_opaque == NULL); assert(hook_opaque == NULL); (void) opt; - (void) lang_opaque; (void) hook_opaque; - n = 0; - - for (i = 0; i < count; i++) { - n |= 1U << ids[i]; - } - - fprintf(f, "return %#x;", (unsigned) n); - - fprintf(f, " /* "); + fprintf(f, "/* "); for (i = 0; i < count; i++) { assert(ids[i] < matchc); @@ -375,30 +364,19 @@ accept_c(FILE *f, const struct fsm_options *opt, } static int -accept_rust(FILE *f, const struct fsm_options *opt, +comment_rust(FILE *f, const struct fsm_options *opt, const fsm_end_id_t *ids, size_t count, - void *lang_opaque, void *hook_opaque) + void *hook_opaque) { - unsigned n; size_t i; assert(opt != NULL); - assert(lang_opaque == NULL); assert(hook_opaque == NULL); (void) opt; - (void) lang_opaque; (void) hook_opaque; - n = 0; - - for (i = 0; i < count; i++) { - n |= 1U << ids[i]; - } - - fprintf(f, "return Some(%#x)", (unsigned) n); - - fprintf(f, " /* "); + fprintf(f, "// "); for (i = 0; i < count; i++) { assert(ids[i] < matchc); @@ -410,37 +388,23 @@ accept_rust(FILE *f, const struct fsm_options *opt, } } - fprintf(f, " */"); - return 0; } static int -accept_llvm(FILE *f, const struct fsm_options *opt, +comment_llvm(FILE *f, const struct fsm_options *opt, const fsm_end_id_t *ids, size_t count, - void *lang_opaque, void *hook_opaque) + void *hook_opaque) { - unsigned n; size_t i; assert(opt != NULL); - assert(lang_opaque != NULL); assert(hook_opaque == NULL); (void) opt; (void) hook_opaque; - n = 0; - - for (i = 0; i < count; i++) { - n |= 1U << ids[i]; - } - - i = * (const size_t *) lang_opaque; - - fprintf(f, "[u%#x, %%ret%zu],", (unsigned) n, i); - - fprintf(f, " ; "); + fprintf(f, "; "); for (i = 0; i < count; i++) { assert(ids[i] < matchc); @@ -452,98 +416,35 @@ accept_llvm(FILE *f, const struct fsm_options *opt, } } - fprintf(f, "\n"); - return 0; } static int -accept_dot(FILE *f, const struct fsm_options *opt, +comment_dot(FILE *f, const struct fsm_options *opt, const fsm_end_id_t *ids, size_t count, - void *lang_opaque, void *hook_opaque) -{ - fsm_state_t s; - size_t i; - - assert(opt != NULL); - assert(lang_opaque != NULL); - assert(hook_opaque == NULL); - - (void) opt; - (void) hook_opaque; - - s = * (fsm_state_t *) lang_opaque; - - fprintf(f, "label = <"); - - if (!opt->anonymous_states) { - fprintf(f, "%u", s); - - if (count > 0) { - fprintf(f, "
"); - } - } - - for (i = 0; i < count; i++) { - assert(ids[i] < matchc); - - fprintf(f, "#%u", ids[i]); - - if (i + 1 < count) { - fprintf(f, ","); - } - } - - fprintf(f, ">"); - - /* TODO: centralise to libfsm/print/dot.c */ - if (opt->comments) { - fprintf(f, " /* "); - - for (i = 0; i < count; i++) { - assert(ids[i] < matchc); - - fprintf(f, "\"%s\"", matchv[ids[i]]); /* XXX: escape string (and comment) */ - - if (i + 1 < count) { - fprintf(f, ", "); - } - } - - fprintf(f, " */"); - } - - return 0; -} - -static int -accept_json(FILE *f, const struct fsm_options *opt, - const fsm_end_id_t *ids, size_t count, - void *lang_opaque, void *hook_opaque) + void *hook_opaque) { size_t i; assert(opt != NULL); - assert(lang_opaque == NULL); assert(hook_opaque == NULL); (void) opt; - (void) lang_opaque; (void) hook_opaque; - fprintf(f, "[ "); + fprintf(f, "/* "); for (i = 0; i < count; i++) { assert(ids[i] < matchc); - fprintf(f, "%u", ids[i]); + fprintf(f, "\"%s\"", matchv[ids[i]]); /* XXX: escape string (and comment) */ if (i + 1 < count) { fprintf(f, ", "); } } - fprintf(f, " ]"); + fprintf(f, " */"); return 0; } @@ -1146,33 +1047,30 @@ main(int argc, char *argv[]) } if (fsm_lang != FSM_PRINT_NONE) { - /* TODO: print examples in comments for end states; - * patterns in comments for the whole FSM */ - switch (fsm_lang) { case FSM_PRINT_NONE: break; case FSM_PRINT_C: case FSM_PRINT_VMC: - hooks.accept = accept_c; + hooks.comment = comment_c; break; case FSM_PRINT_RUST: - hooks.accept = accept_rust; + case FSM_PRINT_GO: /* close enough */ + hooks.comment = comment_rust; break; case FSM_PRINT_LLVM: - hooks.accept = accept_llvm; + hooks.comment = comment_llvm; break; case FSM_PRINT_DOT: case FSM_PRINT_VMDOT: - hooks.accept = patterns ? accept_dot : NULL; + hooks.comment = patterns ? comment_dot : NULL; break; case FSM_PRINT_JSON: - hooks.accept = patterns ? accept_json : NULL; break; default: diff --git a/src/retest/runner.c b/src/retest/runner.c index ea5a54372..0dac9ab38 100644 --- a/src/retest/runner.c +++ b/src/retest/runner.c @@ -379,7 +379,7 @@ runner_init_compiled(struct fsm *fsm, case IMPL_LLVM: r->u.impl_llvm.h = h; - r->u.impl_llvm.func = (bool (*)(const char *, const char *)) (uintptr_t) dlsym(h, "fsm_main"); + r->u.impl_llvm.func = (bool (*)(const char *, const char *)) (uintptr_t) dlsym(h, "fsm.main"); break; case IMPL_GO: