diff --git a/include/fsm/print.h b/include/fsm/print.h
index 453d3d915..ed9699e07 100644
--- a/include/fsm/print.h
+++ b/include/fsm/print.h
@@ -72,6 +72,10 @@ struct fsm_hooks {
int (*reject)(FILE *, const struct fsm_options *opt,
void *lang_opaque, void *hook_opaque);
+ int (*comment)(FILE *, const struct fsm_options *opt,
+ const fsm_end_id_t *ids, size_t count,
+ void *hook_opaque);
+
/* only called for AMBIG_ERROR; see opt.ambig */
int (*conflict)(FILE *, const struct fsm_options *opt,
const fsm_end_id_t *ids, size_t count,
@@ -83,7 +87,17 @@ struct fsm_hooks {
/*
* Print an FSM to the given file stream. The output is written in the format
- * specified.
+ * specified by the lang enum.
+ *
+ * Not all languages support all options, and fsm_print will ENOTSUP where
+ * these are not possible. This is different to when an option is possible
+ * but simply not yet implemented, where fsm_print() will print a message
+ * to stderr and exit.
+ *
+ * The code generation for the typical case of matching input requires the FSM
+ * to be a DFA, and will EINVAL if the FSM is not a DFA. As opposed to e.g.
+ * FSM_PRINT_API, which generates code for other purposes, and does not place
+ * particular expecations on the FSM.
*
* The output options may be NULL, indicating to use defaults.
*
diff --git a/src/libfsm/print.c b/src/libfsm/print.c
index 6ff34e4ae..f8522c678 100644
--- a/src/libfsm/print.c
+++ b/src/libfsm/print.c
@@ -18,6 +18,7 @@
#include "print.h"
#include "internal.h"
+#include "vm/retlist.h"
#include "vm/vm.h"
#include "print/ir.h"
@@ -81,6 +82,31 @@ print_hook_accept(FILE *f,
return 0;
}
+int
+print_hook_comment(FILE *f,
+ const struct fsm_options *opt,
+ const struct fsm_hooks *hooks,
+ const fsm_end_id_t *ids, size_t count)
+{
+ assert(f != NULL);
+ assert(opt != NULL);
+ assert(hooks != NULL);
+
+ if (opt->ambig == AMBIG_ERROR) {
+ assert(count <= 1);
+ }
+
+ if (opt->comments && hooks->comment != NULL) {
+ /* this space is a polyglot */
+ fprintf(f, " ");
+
+ return hooks->comment(f, opt, ids, count,
+ hooks->hook_opaque);
+ }
+
+ return 0;
+}
+
int
print_hook_reject(FILE *f,
const struct fsm_options *opt,
@@ -179,6 +205,7 @@ print_conflicts(FILE *f, const struct fsm *fsm,
assert(res == 1);
// TODO: de-duplicate by ids[], so we don't call the conflict hook an unneccessary number of times
+ // TODO: now i think this is the same as calling once per retlist entry
/*
* The conflict hook is called here (rather in the caller),
@@ -336,20 +363,37 @@ fsm_print(FILE *f, const struct fsm *fsm,
goto done;
}
+ /*
+ * We're building the retlist here based on the ir.
+ * I think we could build the retlist earlier instead,
+ * and then point at the struct ret entries from the ir,
+ * and then dfavm_compile_ir() would pick those up from there.
+ * But for now this is good.
+ */
+ struct ret_list retlist;
+
+ if (!build_retlist(&retlist, ir)) {
+ free_ir(fsm, ir);
+ goto error;
+ }
+
a = zero;
/* TODO: non-const a */
- if (!dfavm_compile_ir(&a, ir, vm_opts)) {
+ if (!dfavm_compile_ir(&a, ir, &retlist, vm_opts)) {
+ free_retlist(&retlist);
free_ir(fsm, ir);
return -1;
}
if (print_vm != NULL) {
- r = print_vm(f, opt, hooks, a.linked);
+ r = print_vm(f, opt, hooks, &retlist, a.linked);
}
dfavm_opasm_finalize_op(&a);
+ free_retlist(&retlist);
+
done:
if (ir != NULL) {
diff --git a/src/libfsm/print.h b/src/libfsm/print.h
index c45fcec53..6fea83274 100644
--- a/src/libfsm/print.h
+++ b/src/libfsm/print.h
@@ -12,6 +12,7 @@ struct fsm_options;
struct fsm_hooks;
struct ir;
struct dfavm_op_ir;
+struct ret_list;
int
print_hook_args(FILE *f,
@@ -31,6 +32,12 @@ print_hook_accept(FILE *f,
void *lang_opaque, void *hook_opaque),
void *lang_opaque);
+int
+print_hook_comment(FILE *f,
+ const struct fsm_options *opt,
+ const struct fsm_hooks *hooks,
+ const fsm_end_id_t *ids, size_t count);
+
int
print_hook_reject(FILE *f,
const struct fsm_options *opt,
@@ -59,6 +66,7 @@ typedef int ir_print_f(FILE *f,
typedef int vm_print_f(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops);
vm_print_f fsm_print_amd64_att;
diff --git a/src/libfsm/print/api.c b/src/libfsm/print/api.c
index d7cb70f1f..43b8cb232 100644
--- a/src/libfsm/print/api.c
+++ b/src/libfsm/print/api.c
@@ -217,7 +217,7 @@ fsm_print_api(FILE *f,
} else {
fprintf(f, "\tfor (i = 0x%02x; i <= 0x%02x; i++) {",
(unsigned int) lo, (unsigned int) hi - 1);
- if (rangeclass(lo, hi - 1)) {
+ if (opt->comments && rangeclass(lo, hi - 1)) {
fprintf(f, " /* '%c' .. '%c' */", (unsigned char) lo, (unsigned char) hi - 1);
}
fprintf(f, "\n");
diff --git a/src/libfsm/print/awk.c b/src/libfsm/print/awk.c
index de98f95eb..fad52c3ee 100644
--- a/src/libfsm/print/awk.c
+++ b/src/libfsm/print/awk.c
@@ -26,6 +26,7 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
#define START UINT32_MAX
@@ -156,10 +157,21 @@ print_end(FILE *f, const struct dfavm_op_ir *op,
return print_hook_reject(f, opt, hooks, default_reject, NULL);
case VM_END_SUCC:
- return print_hook_accept(f, opt, hooks,
- op->endids.ids, op->endids.count,
+ if (-1 == print_hook_accept(f, opt, hooks,
+ op->ret->ids, op->ret->count,
default_accept,
- NULL);
+ NULL))
+ {
+ return -1;
+ }
+
+ if (-1 == print_hook_comment(f, opt, hooks,
+ op->ret->ids, op->ret->count))
+ {
+ return -1;
+ }
+
+ return 0;
default:
assert(!"unreached");
@@ -186,6 +198,7 @@ static int
fsm_print_awkfrag(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops,
const char *cp, const char *prefix)
{
@@ -194,6 +207,7 @@ fsm_print_awkfrag(FILE *f,
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
assert(cp != NULL);
assert(prefix != NULL);
@@ -289,6 +303,7 @@ int
fsm_print_awk(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
const char *prefix;
@@ -297,6 +312,7 @@ fsm_print_awk(FILE *f,
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
if (opt->prefix != NULL) {
prefix = opt->prefix;
@@ -311,7 +327,7 @@ fsm_print_awk(FILE *f,
}
if (opt->fragment) {
- if (-1 == fsm_print_awkfrag(f, opt, hooks, ops, cp, prefix)) {
+ if (-1 == fsm_print_awkfrag(f, opt, hooks, retlist, ops, cp, prefix)) {
return -1;
}
} else {
@@ -333,7 +349,7 @@ fsm_print_awk(FILE *f,
fprintf(f, ", l, c) {\n");
- if (-1 == fsm_print_awkfrag(f, opt, hooks, ops, cp, prefix)) {
+ if (-1 == fsm_print_awkfrag(f, opt, hooks, retlist, ops, cp, prefix)) {
return -1;
}
diff --git a/src/libfsm/print/c.c b/src/libfsm/print/c.c
index 30a176199..bcea6ab5b 100644
--- a/src/libfsm/print/c.c
+++ b/src/libfsm/print/c.c
@@ -71,15 +71,17 @@ print_ids(FILE *f,
return -1;
}
- fprintf(f, "return %u;", ids[0]);
- break;
+ /* fallthrough */
case AMBIG_EARLIEST:
/*
* The libfsm api guarentees these ids are unique,
* and only appear once each, and are sorted.
*/
- fprintf(f, "return %u;", ids[0]);
+ fprintf(f, "{\n");
+ fprintf(f, "\t\t*id = %u;\n", ids[0]);
+ fprintf(f, "\t\treturn 1;\n");
+ fprintf(f, "\t}");
break;
case AMBIG_MULTIPLE:
@@ -101,7 +103,7 @@ print_ids(FILE *f,
fprintf(f, " };\n");
fprintf(f, "\t\t*ids = a;\n");
fprintf(f, "\t\t*count = %zu;\n", count);
- fprintf(f, "\t\treturn 0;\n");
+ fprintf(f, "\t\treturn 1;\n");
fprintf(f, "\t}");
break;
@@ -352,12 +354,18 @@ print_endstates(FILE *f,
/* no end states */
if (!ir_hasend(ir)) {
- fprintf(f, "\treturn 0; /* unexpected EOT */\n");
+ fprintf(f, "\treturn 0;");
+ if (opt->comments) {
+ fprintf(f, " /* unexpected EOT */");
+ }
+ fprintf(f, "\n");
return 0;
}
/* usual case */
- fprintf(f, "\t/* end states */\n");
+ if (opt->comments) {
+ fprintf(f, "\t/* end states */\n");
+ }
fprintf(f, "\tswitch (state) {\n");
for (i = 0; i < ir->n; i++) {
if (!ir->states[i].isend) {
@@ -374,6 +382,12 @@ print_endstates(FILE *f,
return -1;
}
+ if (-1 == print_hook_comment(f, opt, hooks,
+ ir->states[i].endids.ids, ir->states[i].endids.count))
+ {
+ return -1;
+ }
+
fprintf(f, "\n");
}
@@ -410,7 +424,7 @@ fsm_print_cfrag(FILE *f, const struct ir *ir,
fprintf(f, " /* e.g. \"");
escputs(f, opt, c_escputc_str, ir->states[i].example);
fprintf(f, "\" */");
- } else if (i == ir->start) {
+ } else if (i == ir->start && opt->comments) {
fprintf(f, " /* start */");
}
}
@@ -423,7 +437,11 @@ fsm_print_cfrag(FILE *f, const struct ir *ir,
fprintf(f, "\n");
}
fprintf(f, "\t\tdefault:\n");
- fprintf(f, "\t\t\t; /* unreached */\n");
+ fprintf(f, "\t\t\t;");
+ if (opt->comments) {
+ fprintf(f, " /* unreached */");
+ }
+ fprintf(f, "\n");
fprintf(f, "\t\t}\n");
if (ferror(f)) {
@@ -595,7 +613,11 @@ fsm_print_c(FILE *f,
}
if (ir->n == 0) {
- fprintf(f, "\treturn 0; /* no matches */\n");
+ fprintf(f, "\treturn 0;");
+ if (opt->comments) {
+ fprintf(f, " /* no matches */");
+ }
+ fprintf(f, "\n");
} else {
if (-1 == fsm_print_c_body(f, ir, opt, hooks)) {
return -1;
diff --git a/src/libfsm/print/dot.c b/src/libfsm/print/dot.c
index aeb79b254..f4168e1d3 100644
--- a/src/libfsm/print/dot.c
+++ b/src/libfsm/print/dot.c
@@ -265,12 +265,22 @@ print_dotfrag(FILE *f,
return -1;
}
+ if (opt->comments && hooks->comment != NULL) {
+ fprintf(f, ",");
+
+ if (-1 == print_hook_comment(f, opt, hooks,
+ ids, count))
+ {
+ return -1;
+ }
+ }
+
fprintf(f, " ];\n");
f_free(fsm->alloc, ids);
}
- /* TODO: show example here, unless !opt->comments */
+ /* TODO: comment example per state */
if (-1 == print_state(f, opt, hooks, fsm, prefix, s)) {
return -1;
diff --git a/src/libfsm/print/fsm.c b/src/libfsm/print/fsm.c
index cd42e01a3..0c93e4338 100644
--- a/src/libfsm/print/fsm.c
+++ b/src/libfsm/print/fsm.c
@@ -364,7 +364,13 @@ fsm_print_fsm(FILE *f,
if (-1 == print_hook_accept(f, opt, hooks,
ids, count,
default_accept,
- NULL))
+ NULL))
+ {
+ return -1;
+ }
+
+ if (-1 == print_hook_comment(f, opt, hooks,
+ ids, count))
{
return -1;
}
diff --git a/src/libfsm/print/go.c b/src/libfsm/print/go.c
index ee87538a8..50195b69f 100644
--- a/src/libfsm/print/go.c
+++ b/src/libfsm/print/go.c
@@ -25,6 +25,7 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
static const char *
@@ -47,7 +48,8 @@ cmp_operator(int cmp)
static int
print_ids(FILE *f,
- enum fsm_ambig ambig, const fsm_end_id_t *ids, size_t count)
+ enum fsm_ambig ambig, const fsm_end_id_t *ids, size_t count,
+ size_t i)
{
switch (ambig) {
case AMBIG_NONE:
@@ -60,7 +62,7 @@ print_ids(FILE *f,
return -1;
}
- fprintf(f, ", %u;", ids[0]);
+ fprintf(f, ", %u", ids[0]);
break;
case AMBIG_EARLIEST:
@@ -68,12 +70,12 @@ print_ids(FILE *f,
* The libfsm api guarentees these ids are unique,
* and only appear once each, and are sorted.
*/
- fprintf(f, ", %u;", ids[0]);
+ fprintf(f, ", %u", ids[0]);
break;
case AMBIG_MULTIPLE:
- assert(!"unimplemented");
- abort();
+ fprintf(f, ", ret%zu", i);
+ break;
default:
assert(!"unreached");
@@ -88,16 +90,19 @@ default_accept(FILE *f, const struct fsm_options *opt,
const fsm_end_id_t *ids, size_t count,
void *lang_opaque, void *hook_opaque)
{
+ size_t i;
+
assert(f != NULL);
assert(opt != NULL);
- assert(lang_opaque == NULL);
+ assert(lang_opaque != NULL);
- (void) lang_opaque;
(void) hook_opaque;
+ i = * (const size_t *) lang_opaque;
+
fprintf(f, "return true");
- if (-1 == print_ids(f, opt->ambig, ids, count)) {
+ if (-1 == print_ids(f, opt->ambig, ids, count, i)) {
return -1;
}
@@ -115,7 +120,30 @@ default_reject(FILE *f, const struct fsm_options *opt,
(void) lang_opaque;
(void) hook_opaque;
- fprintf(f, "{\n\t\treturn false\n\t}\n");
+ fprintf(f, "{\n\t\treturn false");
+
+ switch (opt->ambig) {
+ case AMBIG_NONE:
+ break;
+
+ case AMBIG_ERROR:
+ fprintf(f, ", 0");
+ break;
+
+ case AMBIG_EARLIEST:
+ fprintf(f, ", 0");
+ break;
+
+ case AMBIG_MULTIPLE:
+ fprintf(f, ", nil");
+ break;
+
+ default:
+ assert(!"unreached");
+ abort();
+ }
+
+ fprintf(f, "\n\t}\n");
return 0;
}
@@ -150,20 +178,32 @@ static int
print_end(FILE *f, const struct dfavm_op_ir *op,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
enum dfavm_op_end end_bits)
{
+ size_t i;
+
switch (end_bits) {
case VM_END_FAIL:
return print_hook_reject(f, opt, hooks, default_reject, NULL);
case VM_END_SUCC:
+ assert(op->ret >= retlist->a);
+
+ i = op->ret - retlist->a;
+
fprintf(f, "{\n");
fprintf(f, "\t\t");
if (-1 == print_hook_accept(f, opt, hooks,
- op->endids.ids, op->endids.count,
- default_accept,
- NULL))
+ op->ret->ids, op->ret->count,
+ default_accept, &i))
+ {
+ return -1;
+ }
+
+ if (-1 == print_hook_comment(f, opt, hooks,
+ op->ret->ids, op->ret->count))
{
return -1;
}
@@ -198,11 +238,27 @@ print_fetch(FILE *f, const struct fsm_options *opt)
}
}
+static void
+print_ret(FILE *f, const unsigned *ids, size_t count)
+{
+ size_t i;
+
+ fprintf(f, "[]uint{");
+ for (i = 0; i < count; i++) {
+ fprintf(f, "%u", ids[i]);
+ if (i + 1 < count) {
+ fprintf(f, ", ");
+ }
+ }
+ fprintf(f, "}");
+}
+
/* TODO: eventually to be non-static */
static int
fsm_print_gofrag(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops,
const char *cp)
{
@@ -210,6 +266,7 @@ fsm_print_gofrag(FILE *f,
assert(f != NULL);
assert(opt != NULL);
+ assert(retlist != NULL);
assert(cp != NULL);
/* TODO: we'll need to heed cp for e.g. lx's codegen */
@@ -272,12 +329,12 @@ fsm_print_gofrag(FILE *f,
switch (op->instr) {
case VM_OP_STOP:
print_cond(f, op, opt);
- print_end(f, op, opt, hooks, op->u.stop.end_bits);
+ print_end(f, op, opt, hooks, retlist, op->u.stop.end_bits);
break;
case VM_OP_FETCH:
print_fetch(f, opt);
- print_end(f, op, opt, hooks, op->u.fetch.end_bits);
+ print_end(f, op, opt, hooks, retlist, op->u.fetch.end_bits);
break;
case VM_OP_BRANCH:
@@ -300,6 +357,7 @@ int
fsm_print_go(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
const char *prefix;
@@ -311,6 +369,7 @@ fsm_print_go(FILE *f,
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
if (opt->prefix != NULL) {
prefix = opt->prefix;
@@ -325,13 +384,22 @@ fsm_print_go(FILE *f,
}
if (opt->fragment) {
- if (-1 == fsm_print_gofrag(f, opt, hooks, ops, cp)) {
+ if (-1 == fsm_print_gofrag(f, opt, hooks, retlist, ops, cp)) {
return -1;
}
} else {
fprintf(f, "package %sfsm\n", package_prefix);
fprintf(f, "\n");
+ if (opt->ambig == AMBIG_MULTIPLE) {
+ for (size_t i = 0; i < retlist->count; i++) {
+ fprintf(f, "var ret%zu []uint = ", i);
+ print_ret(f, retlist->a[i].ids, retlist->a[i].count);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+ }
+
fprintf(f, "func %sMatch", prefix);
switch (opt->io) {
@@ -362,17 +430,16 @@ fsm_print_go(FILE *f,
case AMBIG_NONE:
fprintf(f, "bool");
break;
-
+
case AMBIG_ERROR:
case AMBIG_EARLIEST:
fprintf(f, "(bool, uint)");
break;
case AMBIG_MULTIPLE:
- // TODO: fprintf(f, "(bool, uint[])");
- errno = ENOTSUP;
- return -1;
-
+ fprintf(stdout, "(bool, []uint)");
+ break;
+
default:
assert(!"unreached");
abort();
@@ -380,7 +447,7 @@ fsm_print_go(FILE *f,
fprintf(f, " {\n");
- if (-1 == fsm_print_gofrag(f, opt, hooks, ops, cp)) {
+ if (-1 == fsm_print_gofrag(f, opt, hooks, retlist, ops, cp)) {
return -1;
}
diff --git a/src/libfsm/print/irdot.c b/src/libfsm/print/irdot.c
index 186365203..cff37ee86 100644
--- a/src/libfsm/print/irdot.c
+++ b/src/libfsm/print/irdot.c
@@ -215,8 +215,6 @@ print_state(FILE *f,
fprintf(f, "\n");
}
- /* TODO: leaf callback for dot output */
-
/* showing hook in addition to existing content */
if (cs->isend && hooks->accept != NULL) {
fprintf(f, "\t\t
");
diff --git a/src/libfsm/print/llvm.c b/src/libfsm/print/llvm.c
index ac87090df..ec931769c 100644
--- a/src/libfsm/print/llvm.c
+++ b/src/libfsm/print/llvm.c
@@ -28,32 +28,25 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
#define OPAQUE_POINTERS 1
-#ifdef OPAQUE_POINTERS // llvm >= 15
+#if OPAQUE_POINTERS // llvm >= 15
static const char *ptr_i8 = "ptr";
static const char *ptr_i32 = "ptr";
static const char *ptr_void = "ptr";
+static const char *ptr_rt = "ptr";
#else
static const char *ptr_i8 = "i8*";
static const char *ptr_i32 = "i32*";
static const char *ptr_void = "i8*";
+static const char *ptr_rt = "%rt*";
#endif
static const struct dfavm_op_ir fail; // used as a unqiue address only
-struct ret {
- size_t count;
- const fsm_end_id_t *ids;
-};
-
-struct ret_list {
- size_t count;
- struct ret *a;
-};
-
/*
* If we had a stack, the current set of live values would be a frame.
* We're a DFA, so we don't have a stack. But I still think of them as a frame.
@@ -100,19 +93,16 @@ default_accept(FILE *f, const struct fsm_options *opt,
const fsm_end_id_t *ids, size_t count,
void *lang_opaque, void *hook_opaque)
{
- size_t i;
-
assert(f != NULL);
assert(opt != NULL);
- assert(lang_opaque != NULL);
+ assert(lang_opaque == NULL);
(void) hook_opaque;
-
- i = * (const size_t *) lang_opaque;
+ (void) lang_opaque;
switch (opt->ambig) {
case AMBIG_NONE:
- fprintf(f, "[true, %%ret%zu],\n", i);
+ fprintf(f, "%%rt true");
break;
case AMBIG_ERROR:
@@ -122,7 +112,7 @@ default_accept(FILE *f, const struct fsm_options *opt,
return -1;
}
- fprintf(f, "[{ true, %u }, %%ret%zu],\n", ids[0], i);
+ fprintf(f, "%%rt { i1 true, i32 %u }", ids[0]);
break;
case AMBIG_EARLIEST:
@@ -130,13 +120,19 @@ default_accept(FILE *f, const struct fsm_options *opt,
* The libfsm api guarentees these ids are unique,
* and only appear once each, and are sorted.
*/
- fprintf(f, "[{ true, i32 %u }, %%ret%zu],\n", ids[0], i);
+ fprintf(f, "%%rt { i1 true, i32 %u }", ids[0]);
break;
case AMBIG_MULTIPLE:
- // TODO: probably { i1, ptr_u8 }
- assert(!"unimplemented");
- abort();
+ fprintf(f, "internal unnamed_addr constant [%zu x i32] [", count);
+ for (size_t j = 0; j < count; j++) {
+ fprintf(f, "i32 %u", ids[j]);
+ if (j + 1 < count) {
+ fprintf(f, ", ");
+ }
+ }
+ fprintf(f, "]");
+ break;
default:
assert(!"unreached");
@@ -159,13 +155,16 @@ default_reject(FILE *f, const struct fsm_options *opt,
switch (opt->ambig) {
case AMBIG_NONE:
- fprintf(f, "[false, %%fail]\n");
+ fprintf(f, "%%rt false");
break;
case AMBIG_ERROR:
case AMBIG_EARLIEST:
+ fprintf(f, "%%rt { i1 false, i32 poison }");
+ break;
+
case AMBIG_MULTIPLE:
- fprintf(f, "[{ false, undef }, %%fail]\n");
+ fprintf(f, "%%rt { %s poison, i64 -1 }", ptr_i32);
break;
default:
@@ -173,12 +172,18 @@ default_reject(FILE *f, const struct fsm_options *opt,
abort();
}
+ if (opt->comments) {
+ fprintf(f, " ; fail");
+ }
+
return 0;
}
static int
-print_rettype(FILE *f, enum fsm_ambig ambig)
+print_rettype(FILE *f, const char *name, enum fsm_ambig ambig)
{
+ fprintf(f, "%s = type ", name);
+
switch (ambig) {
case AMBIG_NONE:
fprintf(f, "i1");
@@ -186,19 +191,22 @@ print_rettype(FILE *f, enum fsm_ambig ambig)
case AMBIG_ERROR:
case AMBIG_EARLIEST:
- fprintf(f, "{ i1, u32 }");
+ // success, id
+ fprintf(f, "{ i1, i32 }");
break;
case AMBIG_MULTIPLE:
- // TODO: probably { i1, ptr_u8 }
- assert(!"unimplemented");
- abort();
+ // ids, -1/count
+ fprintf(f, "{ %s, i64 }", ptr_i32);
+ break;
default:
assert(!"unreached");
abort();
}
+ fprintf(f, "\n");
+
return 0;
}
@@ -267,8 +275,11 @@ print_cond(FILE *f, const struct fsm_options *opt, struct dfavm_op_ir *op,
fprintf(f, "icmp %s i8 %%c%u, ",
cmp_operator(op->cmp), use(&frame->c));
llvm_escputcharlit(f, opt, op->cmp_arg);
- fprintf(f, " ; ");
- c_escputcharlit(f, opt, op->cmp_arg); // C escaping for a comment
+
+ if (opt->comments) {
+ fprintf(f, " ; ");
+ c_escputcharlit(f, opt, op->cmp_arg); // C escaping for a comment
+ }
fprintf(f, "\n");
}
@@ -328,7 +339,11 @@ print_fetch(FILE *f, const struct fsm_options *opt,
ptr_i8);
print_decl(f, "r", decl(&frame->r));
- fprintf(f, "icmp eq i32 %%i%u, -1 ; EOF\n", n);
+ fprintf(f, "icmp eq i32 %%i%u, -1", n);
+ if (opt->comments) {
+ fprintf(f, " ; EOF");
+ }
+ fprintf(f, "\n");
// XXX: we don't distinguish error from eof
// https://github.com/katef/libfsm/issues/484
@@ -358,8 +373,11 @@ print_fetch(FILE *f, const struct fsm_options *opt,
ptr_i8, n);
print_decl(f, "r", decl(&frame->r));
- fprintf(f, "icmp eq i8 %%c%u, 0 ; EOT\n",
- n);
+ fprintf(f, "icmp eq i8 %%c%u, 0", n);
+ if (opt->comments) {
+ fprintf(f, " ; EOT");
+ }
+ fprintf(f, "\n");
print_branch(f, frame,
end_bits == VM_END_FAIL ? &fail : NULL,
@@ -381,8 +399,11 @@ print_fetch(FILE *f, const struct fsm_options *opt,
ptr_i8, n);
print_decl(f, "r", decl(&frame->r));
- fprintf(f, "icmp eq %s %%p%u, %%e ; EOF\n",
- ptr_i8, n);
+ fprintf(f, "icmp eq %s %%p%u, %%e", ptr_i8, n);
+ if (opt->comments) {
+ fprintf(f, " ; EOT");
+ }
+ fprintf(f, "\n");
print_branch(f, frame,
end_bits == VM_END_FAIL ? &fail : NULL,
@@ -410,127 +431,22 @@ print_fetch(FILE *f, const struct fsm_options *opt,
}
}
-static bool
-append_ret(struct ret_list *list,
- const fsm_end_id_t *ids, size_t count)
-{
- const size_t low = 16; /* must be power of 2 */
- const size_t factor = 2; /* must be even */
-
- assert(list != NULL);
-
- if (list->count == 0) {
- list->a = malloc(low * sizeof *list->a);
- if (list->a == NULL) {
- return false;
- }
- } else if (list->count >= low && (list->count & (list->count - 1)) == 0) {
- void *tmp;
- size_t new = list->count * factor;
- if (new < list->count) {
- errno = E2BIG;
- perror("realloc");
- exit(EXIT_FAILURE);
- }
-
- tmp = realloc(list->a, new * sizeof *list->a);
- if (tmp == NULL) {
- return false;
- }
-
- list->a = tmp;
- }
-
- list->a[list->count].ids = ids;
- list->a[list->count].count = count;
-
- list->count++;
-
- return true;
-}
-
-static int
-cmp_ret_by_endid(const void *pa, const void *pb)
-{
- const struct ret *a = pa;
- const struct ret *b = pb;
-
- if (a->count < b->count) { return -1; }
- if (a->count > b->count) { return +1; }
-
- return memcmp(a->ids, b->ids, a->count * sizeof *a->ids);
-}
-
-static struct ret *
-find_ret(const struct ret_list *list, const struct dfavm_op_ir *op,
- int (*cmp)(const void *pa, const void *pb))
-{
- struct ret key;
-
- assert(op != NULL);
- assert(cmp != NULL);
-
- key.count = op->endids.count;
- key.ids = op->endids.ids;
-
- return bsearch(&key, list->a, list->count, sizeof *list->a, cmp);
-}
-
-static bool
-build_retlist(struct ret_list *list, const struct dfavm_op_ir *a)
-{
- const struct dfavm_op_ir *op;
-
- assert(list != NULL);
-
- for (op = a; op != NULL; op = op->next) {
- switch (op->instr) {
- case VM_OP_STOP:
- if (op->u.stop.end_bits == VM_END_FAIL) {
- /* %fail is special, don't add to retlist */
- continue;
- }
-
- break;
-
- case VM_OP_FETCH:
- if (op->u.fetch.end_bits == VM_END_FAIL) {
- /* %fail is special, don't add to retlist */
- continue;
- }
-
- break;
-
- case VM_OP_BRANCH:
- continue;
-
- default:
- assert(!"unreached");
- abort();
- }
-
- if (!append_ret(list, op->endids.ids, op->endids.count)) {
- return false;
- }
- }
-
- return true;
-}
-
/* TODO: eventually to be non-static */
static int
fsm_print_llvmfrag(FILE *f,
const struct fsm_options *opt,
- const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops,
- const char *cp)
+ const char *cp,
+ const char *prefix)
{
- struct ret_list retlist;
struct dfavm_op_ir *op;
assert(f != NULL);
assert(opt != NULL);
+ assert(retlist != NULL);
assert(cp != NULL);
+ assert(prefix != NULL);
/* TODO: we'll need to heed cp for e.g. lx's codegen */
(void) cp;
@@ -546,33 +462,6 @@ fsm_print_llvmfrag(FILE *f,
}
{
- retlist.count = 0;
- build_retlist(&retlist, ops);
-
- if (retlist.count > 0) {
- size_t j = 0;
-
- /* sort for both dedup and bsearch */
- qsort(retlist.a, retlist.count, sizeof *retlist.a, cmp_ret_by_endid);
-
- /* deduplicate based on endids only.
- * j is the start of a run; i increments until we find
- * the start of the next run */
- for (size_t i = 1; i < retlist.count; i++) {
- assert(i > j);
- if (cmp_ret_by_endid(&retlist.a[j], &retlist.a[i]) == 0) {
- continue;
- }
-
- j++;
- retlist.a[j] = retlist.a[i];
- }
-
- retlist.count = j + 1;
-
- assert(retlist.count > 0);
- }
-
print_jump(f, ops);
/*
@@ -582,12 +471,14 @@ fsm_print_llvmfrag(FILE *f,
* This looks like:
*
* stop:
- * %ret = phi i1
- * [true, %ret0], ; "abc"
- * [true, %ret1], ; "xyz"
- * [true, %ret2], ; "abc", "xyz"
- * [false, %fail]
- * ret i1 %ret
+ * %i = phi i64
+ * [0, %ret0],
+ * [1, %ret1],
+ * [2, %ret2],
+ * [3, %fail]
+ * %p = getelementptr inbounds [4 x %rt], [4 x %rt]* @fsm.r, i64 0, i64 %i
+ * %ret = load %rt, ptr %p
+ * ret %rt %ret
* fail:
* br label %stop
* ret0:
@@ -597,6 +488,8 @@ fsm_print_llvmfrag(FILE *f,
* ret2:
* br label %stop
*
+ * where @fsm.r is [4 x %rt] and %rt is the return type.
+ *
* And we jump to stop: via the ret*: labels rather than
* to a phi node directly. This helps for two reasons:
*
@@ -612,36 +505,23 @@ fsm_print_llvmfrag(FILE *f,
*/
print_label(f, true, "stop");
- fprintf(f, "\t%%ret = phi ");
- print_rettype(f, opt->ambig);
- fprintf(f, "\n");
-
- for (size_t i = 0; i < retlist.count; i++) {
- fprintf(f, "\t ");
-
- if (-1 == print_hook_accept(f, opt, hooks,
- retlist.a[i].ids, retlist.a[i].count,
- default_accept, &i))
- {
- return -1;
- }
+ fprintf(f, "\t%%i = phi i64\n");
+ for (size_t i = 0; i < retlist->count; i++) {
+ fprintf(f, "\t [%zu, %%ret%zu],\n", i, i);
}
+ fprintf(f, "\t [%zu, %%fail]\n", retlist->count);
- fprintf(f, "\t ");
- if (-1 == print_hook_reject(f, opt, hooks, default_reject, NULL)) {
- return -1;
- }
-
- fprintf(f, "\tret ");
- print_rettype(f, opt->ambig);
- fprintf(f, " %%ret\n");
+ fprintf(f, "\t%%p = getelementptr inbounds [%zu x %%rt], [%zu x %%rt]* @%sr, i64 0, i64 %%i\n",
+ retlist->count + 1, retlist->count + 1, prefix);
+ fprintf(f, "\t%%ret = load %%rt, %s %%p\n", ptr_rt);
+ fprintf(f, "\tret %%rt %%ret\n");
print_label(f, true, "fail");
fprintf(f, "\tbr ");
print_label(f, false, "stop");
fprintf(f, "\n");
- for (size_t i = 0; i < retlist.count; i++) {
+ for (size_t i = 0; i < retlist->count; i++) {
print_label(f, true, "ret%zu", i);
fprintf(f, "\tbr ");
print_label(f, false, "stop");
@@ -653,16 +533,22 @@ fsm_print_llvmfrag(FILE *f,
for (op = ops; op != NULL; op = op->next) {
if (op->instr != VM_OP_STOP || op->cmp != VM_CMP_ALWAYS || op->u.stop.end_bits != VM_END_FAIL) {
print_label(f, true, "l%" PRIu32, op->index);
- }
- if (op->example != NULL) {
- /* C's escaping seems to be a subset of llvm's, and these are
- * for comments anyway. So I'm borrowing this for C here */
- fprintf(f, "\t; e.g. \"");
- escputs(f, opt, c_escputc_str, op->example);
- fprintf(f, "\"");
+ /*
+ * We only show examples when there's a label for the block,
+ * otherwise it's confusing with the conditionally elided
+ * optimisations per-instruction below, which can result in
+ * no block code being emitted for a particular vm op.
+ */
+ if (op->example != NULL) {
+ /* C's escaping seems to be a subset of llvm's, and these are
+ * for comments anyway. So I'm borrowing this for C here */
+ fprintf(f, "\t; e.g. \"");
+ escputs(f, opt, c_escputc_str, op->example);
+ fprintf(f, "\"");
- fprintf(f, "\n");
+ fprintf(f, "\n");
+ }
}
switch (op->instr) {
@@ -683,14 +569,12 @@ fsm_print_llvmfrag(FILE *f,
if (op->u.stop.end_bits == VM_END_FAIL) {
/* handled above */
} else {
- assert(retlist.count > 0);
- const struct ret *ret = find_ret(&retlist, op, cmp_ret_by_endid);
+ assert(retlist->count > 0);
+ const struct ret *ret = op->ret;
assert(ret != NULL);
- assert(ret >= retlist.a && ret <= (retlist.a + retlist.count));
- assert(ret->count == op->endids.count);
- assert(0 == memcmp(ret->ids, op->endids.ids, ret->count));
+ assert(ret >= retlist->a && ret <= (retlist->a + retlist->count));
fprintf(f, "\tbr ");
- print_label(f, false, "ret%u", ret - retlist.a);
+ print_label(f, false, "ret%u", ret - retlist->a);
fprintf(f, "\n");
}
break;
@@ -701,14 +585,12 @@ fsm_print_llvmfrag(FILE *f,
if (op->u.fetch.end_bits == VM_END_FAIL) {
/* handled in print_fetch() */
} else {
- assert(retlist.count > 0);
- const struct ret *ret = find_ret(&retlist, op, cmp_ret_by_endid);
+ assert(retlist->count > 0);
+ const struct ret *ret = op->ret;
assert(ret != NULL);
- assert(ret >= retlist.a && ret <= (retlist.a + retlist.count));
- assert(ret->count == op->endids.count);
- assert(0 == memcmp(ret->ids, op->endids.ids, ret->count));
+ assert(ret >= retlist->a && ret <= (retlist->a + retlist->count));
fprintf(f, "\tbr ");
- print_label(f, false, "ret%u", ret - retlist.a);
+ print_label(f, false, "ret%u", ret - retlist->a);
fprintf(f, "\n");
}
break;
@@ -734,10 +616,6 @@ fsm_print_llvmfrag(FILE *f,
}
}
- if (retlist.count > 0) {
- free(retlist.a);
- }
-
return 0;
}
@@ -745,6 +623,7 @@ int
fsm_print_llvm(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
const char *prefix;
@@ -753,11 +632,12 @@ fsm_print_llvm(FILE *f,
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
if (opt->prefix != NULL) {
prefix = opt->prefix;
} else {
- prefix = "fsm_";
+ prefix = "fsm.";
}
if (hooks->cp != NULL) {
@@ -767,13 +647,71 @@ fsm_print_llvm(FILE *f,
}
if (opt->fragment) {
- fsm_print_llvmfrag(f, opt, hooks, ops, cp);
+ fsm_print_llvmfrag(f, opt, retlist, ops, cp, prefix);
return 0;
}
fprintf(f, "; generated\n");
-//XXX: type depends on ambig
- fprintf(f, "define dso_local i1 @%smain", prefix);
+ print_rettype(f, "%rt", opt->ambig);
+
+ /*
+ * For AMBIG_MULTIPLE we emit a bunch of arrays and then point at them from
+ * each %rt. So we call the hook for the arrays, because that's where the id
+ * list is. For other ambig modes, we call the hook for the %rt instead.
+ */
+ if (opt->ambig == AMBIG_MULTIPLE) {
+ for (size_t i = 0; i < retlist->count; i++) {
+ fprintf(f, "@%sr%zu = ", prefix, i);
+ if (-1 == print_hook_accept(f, opt, hooks,
+ retlist->a[i].ids, retlist->a[i].count,
+ default_accept, NULL))
+ {
+ return -1;
+ }
+
+ if (-1 == print_hook_comment(f, opt, hooks,
+ retlist->a[i].ids, retlist->a[i].count))
+ {
+ return -1;
+ }
+
+ fprintf(f, "\n");
+ }
+ }
+
+ fprintf(f, "@%sr = internal unnamed_addr constant [%zu x %%rt] [\n", prefix, retlist->count + 1);
+ for (size_t i = 0; i < retlist->count; i++) {
+ fprintf(f, "\t ");
+ if (opt->ambig == AMBIG_MULTIPLE) {
+ fprintf(f, "%%rt { %s bitcast ([%zu x i32]* @%sr%zu to %s), i64 %zu }",
+ ptr_i32, retlist->a[i].count, prefix, i, ptr_i32, retlist->a[i].count);
+ fprintf(f, ",");
+ } else {
+ if (-1 == print_hook_accept(f, opt, hooks,
+ retlist->a[i].ids, retlist->a[i].count,
+ default_accept, NULL))
+ {
+ return -1;
+ }
+
+ fprintf(f, ",");
+
+ if (-1 == print_hook_comment(f, opt, hooks,
+ retlist->a[i].ids, retlist->a[i].count))
+ {
+ return -1;
+ }
+ }
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\t ");
+ if (-1 == print_hook_reject(f, opt, hooks, default_reject, NULL)) {
+ return -1;
+ }
+ fprintf(f, "\n");
+ fprintf(f, "\t]\n");
+
+ fprintf(f, "define dso_local %%rt @%smain", prefix);
switch (opt->io) {
case FSM_IO_GETC:
@@ -824,7 +762,7 @@ fsm_print_llvm(FILE *f,
exit(EXIT_FAILURE);
}
- fsm_print_llvmfrag(f, opt, hooks, ops, cp);
+ fsm_print_llvmfrag(f, opt, retlist, ops, cp, prefix);
fprintf(f, "}\n");
fprintf(f, "\n");
diff --git a/src/libfsm/print/rust.c b/src/libfsm/print/rust.c
index 5c35747d9..b11fb2c5a 100644
--- a/src/libfsm/print/rust.c
+++ b/src/libfsm/print/rust.c
@@ -26,6 +26,7 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
#define START UINT32_MAX
@@ -50,7 +51,8 @@ cmp_operator(int cmp)
static int
print_ids(FILE *f,
- enum fsm_ambig ambig, const fsm_end_id_t *ids, size_t count)
+ enum fsm_ambig ambig, const fsm_end_id_t *ids, size_t count,
+ size_t i)
{
switch (ambig) {
case AMBIG_NONE:
@@ -62,8 +64,8 @@ print_ids(FILE *f,
if (count > 1) {
errno = EINVAL;
return -1;
- }
-
+ }
+
fprintf(f, "return Some(%u)", ids[0]);
break;
@@ -74,16 +76,16 @@ print_ids(FILE *f,
*/
fprintf(f, "return Some(%u)", ids[0]);
break;
-
+
case AMBIG_MULTIPLE:
- assert(!"unimplemented");
- abort();
-
+ fprintf(f, "return Some(&RET%zu)", i);
+ break;
+
default:
assert(!"unreached");
abort();
}
-
+
return 0;
}
@@ -91,29 +93,32 @@ static int
default_accept(FILE *f, const struct fsm_options *opt,
const fsm_end_id_t *ids, size_t count,
void *lang_opaque, void *hook_opaque)
-{
+{
+ size_t i;
+
assert(f != NULL);
assert(opt != NULL);
- assert(lang_opaque == NULL);
-
- (void) lang_opaque;
+ assert(lang_opaque != NULL);
+
(void) hook_opaque;
-
- if (-1 == print_ids(f, opt->ambig, ids, count)) {
+
+ i = * (const size_t *) lang_opaque;
+
+ if (-1 == print_ids(f, opt->ambig, ids, count, i)) {
return -1;
}
return 0;
}
-static int
+static int
default_reject(FILE *f, const struct fsm_options *opt,
void *lang_opaque, void *hook_opaque)
{
assert(f != NULL);
assert(opt != NULL);
assert(lang_opaque == NULL);
-
+
(void) lang_opaque;
(void) hook_opaque;
@@ -162,17 +167,24 @@ static int
print_end(FILE *f, const struct dfavm_op_ir *op,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
enum dfavm_op_end end_bits)
{
+ size_t i;
+
switch (end_bits) {
case VM_END_FAIL:
return print_hook_reject(f, opt, hooks, default_reject, NULL);
case VM_END_SUCC:
+ assert(op->ret >= retlist->a);
+
+ i = op->ret - retlist->a;
+
return print_hook_accept(f, opt, hooks,
- op->endids.ids, op->endids.count,
+ op->ret->ids, op->ret->count,
default_accept,
- NULL);
+ &i);
default:
assert(!"unreached");
@@ -200,11 +212,27 @@ print_fetch(FILE *f)
fprintf(f, "bytes.next()");
}
+static void
+print_ret(FILE *f, const unsigned *ids, size_t count)
+{
+ size_t i;
+
+ fprintf(f, "[");
+ for (i = 0; i < count; i++) {
+ fprintf(f, "%u", ids[i]);
+ if (i + 1 < count) {
+ fprintf(f, ", ");
+ }
+ }
+ fprintf(f, "];");
+}
+
/* TODO: eventually to be non-static */
static int
fsm_print_rustfrag(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops,
const char *cp)
{
@@ -213,11 +241,21 @@ fsm_print_rustfrag(FILE *f,
assert(f != NULL);
assert(opt != NULL);
+ assert(retlist != NULL);
assert(cp != NULL);
/* TODO: we'll need to heed cp for e.g. lx's codegen */
(void) cp;
+ if (opt->ambig == AMBIG_MULTIPLE) {
+ for (size_t i = 0; i < retlist->count; i++) {
+ fprintf(f, " static RET%zu: [u32; %zu] = ", i, retlist->a[i].count);
+ print_ret(f, retlist->a[i].ids, retlist->a[i].count);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+ }
+
/*
* We only output labels for ops which are branched to. This gives
* gaps in the sequence for ops which don't need a label.
@@ -319,13 +357,21 @@ fsm_print_rustfrag(FILE *f,
if (op->cmp != VM_CMP_ALWAYS) {
fprintf(f, "{ ");
}
- if (-1 == print_end(f, op, opt, hooks, op->u.stop.end_bits)) {
+ if (-1 == print_end(f, op, opt, hooks, retlist, op->u.stop.end_bits)) {
return -1;
}
if (op->cmp != VM_CMP_ALWAYS) {
fprintf(f, " }");
}
+ if (op->u.stop.end_bits == VM_END_SUCC) {
+ if (-1 == print_hook_comment(f, opt, hooks,
+ op->ret->ids, op->ret->count))
+ {
+ return -1;
+ }
+ }
+
if (op->cmp == VM_CMP_ALWAYS) {
/* the code for fallthrough would be unreachable */
fallthrough = false;
@@ -367,7 +413,7 @@ fsm_print_rustfrag(FILE *f,
fprintf(f, " ");
fprintf(f, "None => ");
- print_end(f, op, opt, hooks, op->u.fetch.end_bits);
+ print_end(f, op, opt, hooks, retlist, op->u.fetch.end_bits);
fprintf(f, ",\n");
fprintf(f, " ");
@@ -415,6 +461,7 @@ int
fsm_print_rust(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
const char *prefix;
@@ -423,6 +470,7 @@ fsm_print_rust(FILE *f,
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
if (opt->prefix != NULL) {
prefix = opt->prefix;
@@ -437,7 +485,7 @@ fsm_print_rust(FILE *f,
}
if (opt->fragment) {
- fsm_print_rustfrag(f, opt, hooks, ops, cp);
+ fsm_print_rustfrag(f, opt, hooks, retlist, ops, cp);
goto error;
}
@@ -448,22 +496,19 @@ fsm_print_rust(FILE *f,
switch (opt->io) {
case FSM_IO_GETC:
/* e.g. dbg!(fsm_main("abc".as_bytes().iter().copied())); */
- fprintf(f, "(mut bytes: impl Iterator- ) -> Option<()> {\n");
- fprintf(f, " use Label::*;\n");
+ fprintf(f, "(mut bytes: impl Iterator
- )");
break;
case FSM_IO_STR:
/* e.g. dbg!(fsm_main("xabces")); */
- fprintf(f, "(%sinput: &str) -> Option<()> {\n",
+ fprintf(f, "(%sinput: &str)",
has_op(ops, VM_OP_FETCH) ? "" : "_");
- fprintf(f, " use Label::*;\n");
break;
case FSM_IO_PAIR:
/* e.g. dbg!(fsm_main("xabces".as_bytes())); */
- fprintf(f, "(%sinput: &[u8]) -> Option<()> {\n",
+ fprintf(f, "(%sinput: &[u8])",
has_op(ops, VM_OP_FETCH) ? "" : "_");
- fprintf(f, " use Label::*;\n");
break;
default:
@@ -471,7 +516,27 @@ fsm_print_rust(FILE *f,
exit(EXIT_FAILURE);
}
- fsm_print_rustfrag(f, opt, hooks, ops, cp);
+ fprintf(f, " -> ");
+
+ switch (opt->ambig) {
+ case AMBIG_NONE:
+ case AMBIG_ERROR:
+ case AMBIG_EARLIEST:
+ fprintf(f, "Option<()>");
+ break;
+
+ case AMBIG_MULTIPLE:
+ fprintf(f, "Option<&'static [u32]>");
+ break;
+ default:
+ fprintf(stderr, "unsupported ambig mode\n");
+ exit(EXIT_FAILURE);
+ }
+
+ fprintf(f, " {\n");
+ fprintf(f, " use Label::*;\n");
+
+ fsm_print_rustfrag(f, opt, hooks, retlist, ops, cp);
fprintf(f, "}\n");
fprintf(f, "\n");
diff --git a/src/libfsm/print/sh.c b/src/libfsm/print/sh.c
index 05692df59..4b9c647c6 100644
--- a/src/libfsm/print/sh.c
+++ b/src/libfsm/print/sh.c
@@ -26,6 +26,7 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
static const char *
@@ -204,10 +205,21 @@ print_end(FILE *f, const struct dfavm_op_ir *op,
return print_hook_reject(f, opt, hooks, default_reject, NULL);
case VM_END_SUCC:
- return print_hook_accept(f, opt, hooks,
- op->endids.ids, op->endids.count,
+ if (-1 == print_hook_accept(f, opt, hooks,
+ op->ret->ids, op->ret->count,
default_accept,
- NULL);
+ NULL))
+ {
+ return -1;
+ }
+
+ if (-1 == print_hook_comment(f, opt, hooks,
+ op->ret->ids, op->ret->count))
+ {
+ return -1;
+ }
+
+ return 0;
default:
assert(!"unreached");
@@ -233,6 +245,7 @@ int
fsm_print_sh(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
struct dfavm_op_ir *op;
@@ -240,6 +253,7 @@ fsm_print_sh(FILE *f,
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
if (opt->io != FSM_IO_STR) {
errno = ENOTSUP;
diff --git a/src/libfsm/print/vmasm.c b/src/libfsm/print/vmasm.c
index c5ac87492..dc9b76ee7 100644
--- a/src/libfsm/print/vmasm.c
+++ b/src/libfsm/print/vmasm.c
@@ -24,6 +24,7 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
enum asm_dialect {
@@ -50,11 +51,16 @@ print_end(FILE *f, const struct dfavm_op_ir *op,
case VM_END_SUCC:
if (-1 == print_hook_accept(f, opt, hooks,
- op->endids.ids, op->endids.count,
+ op->ret->ids, op->ret->count,
NULL, NULL))
{
return -1;
}
+ if (-1 == print_hook_comment(f, opt, hooks,
+ op->ret->ids, op->ret->count))
+ {
+ return -1;
+ }
break;
default:
@@ -152,12 +158,16 @@ print_asm_amd64(FILE *f,
switch (opt->io) {
case FSM_IO_STR:
- fprintf(f, "// func %s%s(data string) int\n", prefix, "Match");
+ if (opt->comments) {
+ fprintf(f, "// func %s%s(data string) int\n", prefix, "Match");
+ }
fprintf(f, "TEXT ·%s(SB), NOSPLIT, $0-24\n", "Match");
break;
case FSM_IO_PAIR:
- fprintf(f, "// func %s%s(data []byte) int\n", prefix, "Match");
+ if (opt->comments) {
+ fprintf(f, "// func %s%s(data []byte) int\n", prefix, "Match");
+ }
fprintf(f, "TEXT ·%s%s(SB), NOSPLIT, $0-32\n", prefix, "Match");
break;
@@ -194,7 +204,9 @@ print_asm_amd64(FILE *f,
for (op = ops; op != NULL; op = op->next) {
if (op->num_incoming > 0) {
fprintf(f, "%sl%u:\n", label_dot, op->index);
- } else {
+
+ // TODO: example
+ } else if (opt->comments) {
fprintf(f, "%s l%u\n", comment, op->index);
}
@@ -209,7 +221,9 @@ print_asm_amd64(FILE *f,
}
if (op->cmp == VM_CMP_ALWAYS && op->next == NULL) {
- fprintf(f, "\t%s elided jmp to %sfinish\n", comment, label_dot);
+ if (opt->comments) {
+ fprintf(f, "\t%s elided jmp to %sfinish\n", comment, label_dot);
+ }
} else {
const char *jmp_op;
@@ -358,6 +372,7 @@ static int
print_vmasm_encoding(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops,
enum asm_dialect dialect)
{
@@ -366,6 +381,7 @@ print_vmasm_encoding(FILE *f,
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
if (dialect == AMD64_GO) {
if (opt->io != FSM_IO_STR && opt->io != FSM_IO_PAIR) {
@@ -392,26 +408,29 @@ int
fsm_print_amd64_att(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
- return print_vmasm_encoding(f, opt, hooks, ops, AMD64_ATT);
+ return print_vmasm_encoding(f, opt, hooks, retlist, ops, AMD64_ATT);
}
int
fsm_print_amd64_nasm(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
- return print_vmasm_encoding(f, opt, hooks, ops, AMD64_NASM);
+ return print_vmasm_encoding(f, opt, hooks, retlist, ops, AMD64_NASM);
}
int
fsm_print_amd64_go(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
- return print_vmasm_encoding(f, opt, hooks, ops, AMD64_GO);
+ return print_vmasm_encoding(f, opt, hooks, retlist, ops, AMD64_GO);
}
diff --git a/src/libfsm/print/vmc.c b/src/libfsm/print/vmc.c
index e2dd15bad..c43073dca 100644
--- a/src/libfsm/print/vmc.c
+++ b/src/libfsm/print/vmc.c
@@ -25,6 +25,7 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
static const char *
@@ -62,16 +63,18 @@ print_ids(FILE *f,
errno = EINVAL;
return -1;
}
-
- fprintf(f, "return %u;", ids[0]);
- break;
-
+
+ /* fallthrough */
+
case AMBIG_EARLIEST:
/*
* The libfsm api guarentees these ids are unique,
* and only appear once each, and are sorted.
*/
- fprintf(f, "return %u;", ids[0]);
+ fprintf(f, "{\n");
+ fprintf(f, "\t\t*id = %u;\n", ids[0]);
+ fprintf(f, "\t\treturn 1;\n");
+ fprintf(f, "\t}");
break;
case AMBIG_MULTIPLE:
@@ -93,7 +96,7 @@ print_ids(FILE *f,
fprintf(f, " };\n");
fprintf(f, "\t\t*ids = a;\n");
fprintf(f, "\t\t*count = %zu;\n", count);
- fprintf(f, "\t\treturn 0;\n");
+ fprintf(f, "\t\treturn 1;\n");
fprintf(f, "\t}");
break;
@@ -175,10 +178,21 @@ print_end(FILE *f, const struct dfavm_op_ir *op,
return print_hook_reject(f, opt, hooks, default_reject, NULL);
case VM_END_SUCC:
- return print_hook_accept(f, opt, hooks,
- op->endids.ids, op->endids.count,
+ if (-1 == print_hook_accept(f, opt, hooks,
+ op->ret->ids, op->ret->count,
default_accept,
- NULL);
+ NULL))
+ {
+ return -1;
+ }
+
+ if (-1 == print_hook_comment(f, opt, hooks,
+ op->ret->ids, op->ret->count))
+ {
+ return -1;
+ }
+
+ return 0;
default:
assert(!"unreached");
@@ -360,6 +374,7 @@ static int
fsm_print_cfrag(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops,
const char *cp)
{
@@ -367,6 +382,7 @@ fsm_print_cfrag(FILE *f,
assert(f != NULL);
assert(opt != NULL);
+ assert(retlist != NULL);
assert(cp != NULL);
/* TODO: we'll need to heed cp for e.g. lx's codegen */
@@ -512,6 +528,7 @@ int
fsm_print_vmc(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
const char *prefix;
@@ -522,6 +539,7 @@ fsm_print_vmc(FILE *f,
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
if (opt->prefix != NULL) {
prefix = opt->prefix;
@@ -530,7 +548,7 @@ fsm_print_vmc(FILE *f,
}
if (opt->fragment) {
- if (-1 == fsm_print_cfrag(f, opt, hooks, ops, cp)) {
+ if (-1 == fsm_print_cfrag(f, opt, hooks, retlist, ops, cp)) {
return -1;
}
} else {
@@ -591,7 +609,7 @@ fsm_print_vmc(FILE *f,
fprintf(f, ")\n");
fprintf(f, "{\n");
- if (-1 == fsm_print_cfrag(f, opt, hooks, ops, cp)) {
+ if (-1 == fsm_print_cfrag(f, opt, hooks, retlist, ops, cp)) {
return -1;
}
diff --git a/src/libfsm/print/vmdot.c b/src/libfsm/print/vmdot.c
index 576d8c08d..e01fc9011 100644
--- a/src/libfsm/print/vmdot.c
+++ b/src/libfsm/print/vmdot.c
@@ -25,6 +25,7 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
static const char *
@@ -128,10 +129,17 @@ print_end(FILE *f,
return print_hook_reject(f, opt, hooks, default_reject, NULL);
case VM_END_SUCC:
- return print_hook_accept(f, opt, hooks,
- op->endids.ids, op->endids.count,
+ if (-1 == print_hook_accept(f, opt, hooks,
+ op->ret->ids, op->ret->count,
default_accept,
- NULL);
+ NULL))
+ {
+ return -1;
+ }
+
+ /* no print_hook_comment() for dot output */
+
+ return 0;
default:
assert(!"unreached");
@@ -267,9 +275,12 @@ fsm_print_edges(FILE *f, const struct fsm_options *opt, const struct dfavm_op_ir
if (op->num_incoming > 0 || op == ops) {
if (op != ops && can_fallthrough) {
fprintf(f, "\t");
- fprintf(f, "S%lu:s -> S%" PRIu32 ":n [ style = bold ]; /* fallthrough */",
+ fprintf(f, "S%lu:s -> S%" PRIu32 ":n [ style = bold ];",
block,
op->index);
+ if (opt->comments) {
+ fprintf(f, " /* fallthrough */");
+ }
fprintf(f, "\n");
}
@@ -305,11 +316,14 @@ fsm_print_edges(FILE *f, const struct fsm_options *opt, const struct dfavm_op_ir
} else {
/* relative branch within the same block, entry on the east */
/* XXX: would like to make these edges shorter, but I don't know how */
- fprintf(f, "S%lu:b%" PRIu32 ":e -> S%lu:b%" PRIu32 ":e [ constraint = false ]; /* relative */",
+ fprintf(f, "S%lu:b%" PRIu32 ":e -> S%lu:b%" PRIu32 ":e [ constraint = false ];",
block,
op->index,
block,
op->u.br.dest_arg->index);
+ if (opt->comments) {
+ fprintf(f, " /* relative */");
+ }
}
fprintf(f, "\n");
@@ -321,10 +335,12 @@ static int
fsm_print_vmdotfrag(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
assert(f != NULL);
assert(opt != NULL);
+ assert(retlist != NULL);
if (-1 == fsm_print_nodes(f, opt, hooks, ops)) {
return -1;
@@ -340,14 +356,16 @@ int
fsm_print_vmdot(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
assert(f != NULL);
assert(opt != NULL);
assert(hooks != NULL);
+ assert(retlist != NULL);
if (opt->fragment) {
- if (-1 == fsm_print_vmdotfrag(f, opt, hooks, ops)) {
+ if (-1 == fsm_print_vmdotfrag(f, opt, hooks, retlist, ops)) {
return -1;
}
} else {
@@ -365,7 +383,7 @@ fsm_print_vmdot(FILE *f,
fprintf(f, "\tstart [ shape = none, label = \"\" ];\n");
fprintf(f, "\tstart -> S0:i0:w [ style = bold ];\n");
- if (-1 == fsm_print_vmdotfrag(f, opt, hooks, ops)) {
+ if (-1 == fsm_print_vmdotfrag(f, opt, hooks, retlist, ops)) {
return -1;
}
diff --git a/src/libfsm/print/vmops.c b/src/libfsm/print/vmops.c
index 7eff8d4e9..d95263062 100644
--- a/src/libfsm/print/vmops.c
+++ b/src/libfsm/print/vmops.c
@@ -25,14 +25,9 @@
#include "libfsm/internal.h"
#include "libfsm/print.h"
+#include "libfsm/vm/retlist.h"
#include "libfsm/vm/vm.h"
-enum vmops_dialect {
- VMOPS_C,
- VMOPS_H,
- VMOPS_MAIN,
-};
-
static const char *
cmp_operator(int cmp)
{
@@ -56,6 +51,7 @@ default_accept(FILE *f, const struct fsm_options *opt,
void *lang_opaque, void *hook_opaque)
{
const char *prefix;
+ size_t i;
assert(f != NULL);
assert(opt != NULL);
@@ -63,13 +59,21 @@ default_accept(FILE *f, const struct fsm_options *opt,
(void) hook_opaque;
- prefix = lang_opaque;
+ if (opt->prefix != NULL) {
+ prefix = opt->prefix;
+ } else {
+ prefix = "fsm_";
+ }
- // TODO: print ids
(void) ids;
(void) count;
fprintf(f, "%sactionRET, 1", prefix);
+ if (opt->ambig != AMBIG_NONE) {
+ i = * (const size_t *) lang_opaque;
+
+ fprintf(f, ", %zu", i);
+ }
return 0;
}
@@ -82,13 +86,21 @@ default_reject(FILE *f, const struct fsm_options *opt,
assert(f != NULL);
assert(opt != NULL);
- assert(lang_opaque != NULL);
+ assert(lang_opaque == NULL);
+ (void) lang_opaque;
(void) hook_opaque;
- prefix = lang_opaque;
+ if (opt->prefix != NULL) {
+ prefix = opt->prefix;
+ } else {
+ prefix = "fsm_";
+ }
fprintf(f, "%sactionRET, 0", prefix);
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, ", 0");
+ }
return 0;
}
@@ -96,7 +108,9 @@ default_reject(FILE *f, const struct fsm_options *opt,
static int
print_label(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt)
{
- fprintf(f, "\t\t/* l%" PRIu32 " */\n", op->index);
+ if (opt->comments) {
+ fprintf(f, "\t\t/* l%" PRIu32 " */\n", op->index);
+ }
if (op->example != NULL) {
fprintf(f, "\t\t/* e.g. \"");
@@ -113,7 +127,7 @@ print_label(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt
static int
print_cond(FILE *f, const struct dfavm_op_ir *op, const struct fsm_options *opt, const char *prefix)
{
- fprintf(f, "\t\t{%s%s, ", prefix, cmp_operator(op->cmp));
+ fprintf(f, "%s%s, ", prefix, cmp_operator(op->cmp));
if (-1 == c_escputcharlit(f, opt, op->cmp_arg)) {
return -1;
}
@@ -126,23 +140,26 @@ static int
print_end(FILE *f, const struct dfavm_op_ir *op,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
- const char *prefix,
+ const struct ret_list *retlist,
enum dfavm_op_end end_bits)
{
+ size_t i;
+
switch (end_bits) {
case VM_END_FAIL:
- if (-1 == print_hook_reject(f, opt, hooks, default_reject,
- (void *) prefix))
- {
+ if (-1 == print_hook_reject(f, opt, hooks, default_reject, NULL)) {
return -1;
}
break;
case VM_END_SUCC:
+ assert(op->ret >= retlist->a);
+
+ i = op->ret - retlist->a;
+
if (-1 == print_hook_accept(f, opt, hooks,
- op->endids.ids, op->endids.count,
- default_accept,
- (void *) prefix))
+ op->ret->ids, op->ret->count,
+ default_accept, &i))
{
return -1;
}
@@ -153,15 +170,16 @@ print_end(FILE *f, const struct dfavm_op_ir *op,
abort();
}
- fprintf(f, "},\n");
-
return 0;
}
static int
-print_branch(FILE *f, const struct dfavm_op_ir *op, const char *prefix)
+print_branch(FILE *f, const struct fsm_options *opt, const struct dfavm_op_ir *op, const char *prefix)
{
- fprintf(f, "%sactionGOTO, %" PRIu32 "},\n", prefix, op->u.br.dest_arg->index);
+ fprintf(f, "%sactionGOTO, %" PRIu32, prefix, op->u.br.dest_arg->index);
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, ", 0");
+ }
return 0;
}
@@ -170,8 +188,9 @@ static int
print_fetch(FILE *f, const struct fsm_options *opt, const char *prefix)
{
- fprintf(f, "\t\t{%sopEOF, 0, ", prefix);
+ fprintf(f, "%sopEOF, 0, ", prefix);
switch (opt->io) {
+ case FSM_IO_GETC:
case FSM_IO_STR:
case FSM_IO_PAIR:
break;
@@ -182,18 +201,65 @@ print_fetch(FILE *f, const struct fsm_options *opt, const char *prefix)
return 0;
}
-/* TODO: eventually to be non-static */
-static int
-fsm_print_vmopsfrag(FILE *f,
+static void
+print_ret(FILE *f, const unsigned *ids, size_t count)
+{
+ size_t i;
+
+ fprintf(f, "{ (const unsigned []) { ");
+ for (i = 0; i < count; i++) {
+ fprintf(f, "%u", ids[i]);
+ if (i + 1 < count) {
+ fprintf(f, ", ");
+ }
+ }
+ fprintf(f, " }, %zu }", count);
+}
+
+int
+fsm_print_vmops_c(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
- struct dfavm_op_ir *ops,
- const char *prefix)
+ const struct ret_list *retlist,
+ struct dfavm_op_ir *ops)
{
- struct dfavm_op_ir *op;
+ const struct dfavm_op_ir *op;
+ const char *prefix;
assert(f != NULL);
assert(opt != NULL);
+ assert(hooks != NULL);
+
+ if (opt->fragment) {
+ errno = ENOTSUP;
+ return -1;
+ }
+
+ if (opt->prefix != NULL) {
+ prefix = opt->prefix;
+ } else {
+ prefix = "fsm_";
+ }
+
+ fprintf(f, "#include \n\n");
+ fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix);
+ fprintf(f, "#include \"%svmops.h\"\n", prefix);
+ fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix);
+ fprintf(f, "\n");
+
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, "struct %sret %sRet[] = {\n", prefix, prefix);
+ for (size_t i = 0; i < retlist->count; i++) {
+ fprintf(f, "\t");
+ print_ret(f, retlist->a[i].ids, retlist->a[i].count);
+ fprintf(f, ",\n");
+ }
+ fprintf(f, "};\n");
+ fprintf(f, "const size_t %sRet_count = sizeof %sRet / sizeof *%sRet;\n", prefix, prefix, prefix);
+ fprintf(f, "\n");
+ }
+
+ fprintf(f, "struct %sop %sOps[] = {\n", prefix, prefix);
for (op = ops; op != NULL; op = op->next) {
if (op->num_incoming > 0) {
@@ -201,12 +267,15 @@ fsm_print_vmopsfrag(FILE *f,
return -1;
}
}
+
+ fprintf(f, "\t{");
+
switch (op->instr) {
case VM_OP_STOP:
if (-1 == print_cond(f, op, opt, prefix)) {
return -1;
}
- if (-1 == print_end(f, op, opt, hooks, prefix, op->u.stop.end_bits)) {
+ if (-1 == print_end(f, op, opt, hooks, retlist, op->u.stop.end_bits)) {
return -1;
}
break;
@@ -215,7 +284,7 @@ fsm_print_vmopsfrag(FILE *f,
if (-1 == print_fetch(f, opt, prefix)) {
return -1;
}
- if (-1 == print_end(f, op, opt, hooks, prefix, op->u.fetch.end_bits)) {
+ if (-1 == print_end(f, op, opt, hooks, retlist, op->u.fetch.end_bits)) {
return -1;
}
break;
@@ -224,7 +293,7 @@ fsm_print_vmopsfrag(FILE *f,
if (-1 == print_cond(f, op, opt, prefix)) {
return -1;
}
- if (-1 == print_branch(f, op, prefix)) {
+ if (-1 == print_branch(f, opt, op, prefix)) {
return -1;
}
break;
@@ -233,17 +302,31 @@ fsm_print_vmopsfrag(FILE *f,
assert(!"unreached");
break;
}
+
+ fprintf(f, "},");
+
+ if (op->instr == VM_OP_STOP && op->u.stop.end_bits == VM_END_SUCC) {
+ if (-1 == print_hook_comment(f, opt, hooks,
+ op->ret->ids, op->ret->count))
+ {
+ return -1;
+ }
+ }
+
+ fprintf(f, "\n");
}
+ fprintf(f, "\t};\n");
+
return 0;
}
int
-fsm_print_vmops(FILE *f,
+fsm_print_vmops_h(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
- struct dfavm_op_ir *ops,
- enum vmops_dialect dialect)
+ const struct ret_list *retlist,
+ struct dfavm_op_ir *ops)
{
const char *prefix;
@@ -251,204 +334,240 @@ fsm_print_vmops(FILE *f,
assert(opt != NULL);
assert(hooks != NULL);
+ (void) retlist;
+ (void) ops;
+
+ if (opt->fragment) {
+ errno = ENOTSUP;
+ return -1;
+ }
+
if (opt->prefix != NULL) {
prefix = opt->prefix;
} else {
prefix = "fsm_";
}
- if (opt->fragment) {
- if (dialect == VMOPS_C) {
- if (-1 == fsm_print_vmopsfrag(f, opt, hooks, ops, prefix)) {
- return -1;
- }
- }
- } else {
- switch (dialect) {
- case VMOPS_C:
- fprintf(f, "#include \n\n");
- fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix);
- fprintf(f, "#include \"%svmops.h\"\n", prefix);
- fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix);
- fprintf(f, "struct %sop %sOps[] = {\n", prefix, prefix);
- if (-1 == fsm_print_vmopsfrag(f, opt, hooks, ops, prefix)) {
- return -1;
- }
- fprintf(f, "\t};\n");
- break;
-
- case VMOPS_H:
- fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix);
- fprintf(f, "#define %sLIBFSM_VMOPS_H\n", prefix);
- fprintf(f, "#include \n\n");
- fprintf(f, "enum %svmOp { %sopEOF, %sopLT, %sopLE, %sopEQ, %sopNE, %sopGE, %sopGT, %sopALWAYS};\n",
- prefix, prefix, prefix, prefix, prefix, prefix, prefix, prefix, prefix);
- fprintf(f, "enum %sactionOp { %sactionRET, %sactionGOTO };\n", prefix, prefix, prefix);
- fprintf(f, "struct %sop { enum %svmOp op; unsigned char c; enum %sactionOp action; int32_t arg; };\n\n",
- prefix, prefix, prefix);
- fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix);
- break;
-
- case VMOPS_MAIN:
- fprintf(f, "#include \n");
- fprintf(f, "#include \n");
- fprintf(f, "#include \n\n");
- fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix);
- fprintf(f, "#include \"%svmops.h\"\n", prefix);
- fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix);
- fprintf(f, "extern struct %sop %sOps[];\n", prefix, prefix);
- fprintf(f, "\n");
-
- switch (opt->io) {
- case FSM_IO_PAIR:
- fprintf(f, "int %smatch(const char *b, const char *e)\n", prefix);
- break;
-
- case FSM_IO_STR:
- fprintf(f, "int %smatch(const char *s)\n", prefix);
- break;
-
- case FSM_IO_GETC:
- errno = ENOTSUP;
- return -1;
- }
- fprintf(f, "{\n");
- fprintf(f, "\tunsigned int i = 0;\n");
- fprintf(f, "\t/* The compiler doesn't know the op stream will have fetch before the first comparison. */\n");
- fprintf(f, "\t/* Initialize to zero to prevent maybe-uninitialized warning. */\n");
- fprintf(f, "\tunsigned char c = 0;\n");
- fprintf(f, "\tint ok;\n");
- fprintf(f, "\tstruct %sop *ops = %sOps;\n", prefix, prefix);
-
- switch (opt->io) {
- case FSM_IO_PAIR:
- fprintf(f, "\tconst char *p = b;\n");
- break;
-
- case FSM_IO_STR:
- fprintf(f, "\tconst char *p = s;\n");
- break;
-
- case FSM_IO_GETC:
- errno = ENOTSUP;
- return -1;
- }
-
- fprintf(f, "\n");
- fprintf(f, "\tfor (;;) {\n");
- fprintf(f, "\t\tok = 0;\n");
- fprintf(f, "\t\tswitch (ops[i].op) {\n");
- fprintf(f, "\t\tcase %sopEOF:\n", prefix);
-
- switch (opt->io) {
- case FSM_IO_PAIR:
- fprintf(f, "\t\t\tif (p < e) {\n");
- fprintf(f, "\t\t\t\t/* not at EOF */\n");
- fprintf(f, "\t\t\t\tc = *p++;\n");
- fprintf(f, "\t\t\t\ti++;\n");
- fprintf(f, "\t\t\t\tcontinue;\n");
- fprintf(f, "\t\t\t}\n");
- break;
-
- case FSM_IO_STR:
- fprintf(f, "\t\t\tc = *p++;\n");
- fprintf(f, "\t\t\tif (c != '\\0') {\n");
- fprintf(f, "\t\t\t\t/* not at EOF */\n");
- fprintf(f, "\t\t\t\ti++;\n");
- fprintf(f, "\t\t\t\tcontinue;\n");
- fprintf(f, "\t\t\t}\n");
- break;
-
- case FSM_IO_GETC:
- errno = ENOTSUP;
- return -1;
- }
-
- fprintf(f, "\t\t\tok = 1;\n");
- fprintf(f, "\t\t\tbreak;\n");
- fprintf(f, "\t\tcase %sopLT: ok = c < ops[i].c; break;\n", prefix);
- fprintf(f, "\t\tcase %sopLE: ok = c <= ops[i].c; break;\n", prefix);
- fprintf(f, "\t\tcase %sopEQ: ok = c == ops[i].c; break;\n", prefix);
- fprintf(f, "\t\tcase %sopNE: ok = c != ops[i].c; break;\n", prefix);
- fprintf(f, "\t\tcase %sopGE: ok = c >= ops[i].c; break;\n", prefix);
- fprintf(f, "\t\tcase %sopGT: ok = c > ops[i].c; break;\n", prefix);
- fprintf(f, "\t\tcase %sopALWAYS: ok = 1; break;\n", prefix);
- fprintf(f, "\t\t}\n");
- fprintf(f, "\t\tif (ok) {\n");
- fprintf(f, "\t\t\tif (ops[i].action == %sactionRET) {\n", prefix);
- fprintf(f, "\t\t\t\treturn (int) (ops[i].arg);\n");
- fprintf(f, "\t\t\t}\n");
- fprintf(f, "\t\t\ti = ops[i].arg;\n");
- fprintf(f, "\t\t\tcontinue;\n");
- fprintf(f, "\t\t}\n");
- fprintf(f, "\t\ti++;\n");
- fprintf(f, "\t}\n");
- fprintf(f, "}\n");
- fprintf(f, "\n");
- fprintf(f, "#define %sBUFFER_SIZE (1024)\n", prefix);
- fprintf(f, "\n");
- fprintf(f, "int main(void)\n");
- fprintf(f, "{\n");
- fprintf(f, "\tchar *buf, *p;\n");
- fprintf(f, "\tint r;\n");
- fprintf(f, "\n");
- fprintf(f, "\tbuf = malloc(%sBUFFER_SIZE);\n", prefix);
- fprintf(f, "\tif (!buf) {\n");
- fprintf(f, "\t\tperror(\"malloc\");\n");
- fprintf(f, "\t\texit(1);\n");
- fprintf(f, "\t}\n\n");
- fprintf(f, "\tfor (;;) {\n");
- fprintf(f, "\t\tp = fgets(buf, %sBUFFER_SIZE, stdin);\n", prefix);
- fprintf(f, "\t\tif (!p) {\n");
- fprintf(f, "\t\t\tbreak;\n");
- fprintf(f, "\t\t}\n");
-
- switch (opt->io) {
- case FSM_IO_PAIR:
- fprintf(f, "\t\tr = %smatch(p, p + strlen(p));\n", prefix);
- break;
- case FSM_IO_STR:
- fprintf(f, "\t\tr = %smatch(p);\n", prefix);
- break;
- case FSM_IO_GETC:
- errno = ENOTSUP;
- return -1;
- }
- fprintf(f, "\t\tprintf(\"%%smatch\\n\", r ? \"no \" : \"\");\n");
- fprintf(f, "\t}\n");
- fprintf(f, "\treturn 0;\n");
- fprintf(f, "}\n");
- break;
- }
+ fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix);
+ fprintf(f, "#define %sLIBFSM_VMOPS_H\n", prefix);
+ fprintf(f, "#include \n\n");
+ fprintf(f, "#include \n\n");
+ fprintf(f, "enum %svmOp { %sopEOF, %sopLT, %sopLE, %sopEQ, %sopNE, %sopGE, %sopGT, %sopALWAYS};\n",
+ prefix, prefix, prefix, prefix, prefix, prefix, prefix, prefix, prefix);
+ fprintf(f, "enum %sactionOp { %sactionRET, %sactionGOTO };\n", prefix, prefix, prefix);
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, "struct %sret { const unsigned *ids; size_t count; };\n\n", prefix);
+ }
+ fprintf(f, "struct %sop { enum %svmOp op; unsigned char c; enum %sactionOp action; int32_t arg; int32_t ret; };\n\n",
+ prefix, prefix, prefix);
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, "extern struct %sret %sRet[];\n", prefix, prefix);
+ fprintf(f, "extern const size_t %sRet_count;\n", prefix);
}
+ fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix);
return 0;
}
int
-fsm_print_vmops_c(FILE *f,
+fsm_print_vmops_main(FILE *f,
const struct fsm_options *opt,
const struct fsm_hooks *hooks,
+ const struct ret_list *retlist,
struct dfavm_op_ir *ops)
{
- return fsm_print_vmops(f, opt, hooks, ops, VMOPS_C);
-}
+ const char *prefix;
-int
-fsm_print_vmops_h(FILE *f,
- const struct fsm_options *opt,
- const struct fsm_hooks *hooks,
- struct dfavm_op_ir *ops)
-{
- return fsm_print_vmops(f, opt, hooks, ops, VMOPS_H);
-}
+ assert(f != NULL);
+ assert(opt != NULL);
+ assert(hooks != NULL);
-int
-fsm_print_vmops_main(FILE *f,
- const struct fsm_options *opt,
- const struct fsm_hooks *hooks,
- struct dfavm_op_ir *ops)
-{
- return fsm_print_vmops(f, opt, hooks, ops, VMOPS_MAIN);
+ (void) retlist;
+ (void) ops;
+
+ if (opt->fragment) {
+ errno = ENOTSUP;
+ return -1;
+ }
+
+ if (opt->prefix != NULL) {
+ prefix = opt->prefix;
+ } else {
+ prefix = "fsm_";
+ }
+
+ fprintf(f, "#include \n");
+ fprintf(f, "#include \n");
+ fprintf(f, "#include \n\n");
+ fprintf(f, "#ifndef %sLIBFSM_VMOPS_H\n", prefix);
+ fprintf(f, "#include \"%svmops.h\"\n", prefix);
+ fprintf(f, "#endif /* %sLIBFSM_VMOPS_H */\n", prefix);
+ fprintf(f, "extern struct %sop %sOps[];\n", prefix, prefix);
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, "extern struct %sret %sRet[];\n", prefix, prefix);
+ fprintf(f, "extern const size_t %sRet_count;\n", prefix);
+ }
+ fprintf(f, "\n");
+
+ fprintf(f, "int %smatch(", prefix);
+ switch (opt->io) {
+ case FSM_IO_PAIR:
+ fprintf(f, "const char *b, const char *e");
+ break;
+
+ case FSM_IO_STR:
+ fprintf(f, "const char *s");
+ break;
+
+ case FSM_IO_GETC:
+ errno = ENOTSUP;
+ return -1;
+ }
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, ",\n");
+ fprintf(f, "\tconst unsigned **ids, size_t *count");
+ }
+ fprintf(f, ")\n");
+
+ fprintf(f, "{\n");
+ fprintf(f, "\tunsigned int i = 0;\n");
+ if (opt->comments) {
+ fprintf(f, "\t/* The compiler doesn't know the op stream will have fetch before the first comparison. */\n");
+ fprintf(f, "\t/* Initialize to zero to prevent maybe-uninitialized warning. */\n");
+ }
+ fprintf(f, "\tunsigned char c = 0;\n");
+ fprintf(f, "\tint ok;\n");
+ fprintf(f, "\tstruct %sop *ops = %sOps;\n", prefix, prefix);
+
+ switch (opt->io) {
+ case FSM_IO_PAIR:
+ fprintf(f, "\tconst char *p = b;\n");
+ break;
+
+ case FSM_IO_STR:
+ fprintf(f, "\tconst char *p = s;\n");
+ break;
+
+ case FSM_IO_GETC:
+ errno = ENOTSUP;
+ return -1;
+ }
+
+ fprintf(f, "\n");
+ fprintf(f, "\tfor (;;) {\n");
+ fprintf(f, "\t\tok = 0;\n");
+ fprintf(f, "\t\tswitch (ops[i].op) {\n");
+ fprintf(f, "\t\tcase %sopEOF:\n", prefix);
+
+ switch (opt->io) {
+ case FSM_IO_PAIR:
+ fprintf(f, "\t\t\tif (p < e) {\n");
+ if (opt->comments) {
+ fprintf(f, "\t\t\t\t/* not at EOF */\n");
+ }
+ fprintf(f, "\t\t\t\tc = *p++;\n");
+ fprintf(f, "\t\t\t\ti++;\n");
+ fprintf(f, "\t\t\t\tcontinue;\n");
+ fprintf(f, "\t\t\t}\n");
+ break;
+
+ case FSM_IO_STR:
+ fprintf(f, "\t\t\tc = *p++;\n");
+ fprintf(f, "\t\t\tif (c != '\\0') {\n");
+ if (opt->comments) {
+ fprintf(f, "\t\t\t\t/* not at EOF */\n");
+ }
+ fprintf(f, "\t\t\t\ti++;\n");
+ fprintf(f, "\t\t\t\tcontinue;\n");
+ fprintf(f, "\t\t\t}\n");
+ break;
+
+ case FSM_IO_GETC:
+ errno = ENOTSUP;
+ return -1;
+ }
+
+ fprintf(f, "\t\t\tok = 1;\n");
+ fprintf(f, "\t\t\tbreak;\n");
+ fprintf(f, "\t\tcase %sopLT: ok = c < ops[i].c; break;\n", prefix);
+ fprintf(f, "\t\tcase %sopLE: ok = c <= ops[i].c; break;\n", prefix);
+ fprintf(f, "\t\tcase %sopEQ: ok = c == ops[i].c; break;\n", prefix);
+ fprintf(f, "\t\tcase %sopNE: ok = c != ops[i].c; break;\n", prefix);
+ fprintf(f, "\t\tcase %sopGE: ok = c >= ops[i].c; break;\n", prefix);
+ fprintf(f, "\t\tcase %sopGT: ok = c > ops[i].c; break;\n", prefix);
+ fprintf(f, "\t\tcase %sopALWAYS: ok = 1; break;\n", prefix);
+ fprintf(f, "\t\t}\n");
+ fprintf(f, "\t\tif (ok) {\n");
+ fprintf(f, "\t\t\tif (ops[i].action == %sactionRET) {\n", prefix);
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, "\t\t\t\tif (ops[i].arg) {\n");
+ fprintf(f, "\t\t\t\t\t*ids = %sRet[ops[i].ret].ids;\n", prefix);
+ fprintf(f, "\t\t\t\t\t*count = %sRet[ops[i].ret].count;\n", prefix);
+ fprintf(f, "\t\t\t\t}\n");
+ }
+ fprintf(f, "\t\t\t\treturn (int) (ops[i].arg);\n");
+ fprintf(f, "\t\t\t}\n");
+ fprintf(f, "\t\t\ti = ops[i].arg;\n");
+ fprintf(f, "\t\t\tcontinue;\n");
+ fprintf(f, "\t\t}\n");
+ fprintf(f, "\t\ti++;\n");
+ fprintf(f, "\t}\n");
+ fprintf(f, "}\n");
+ fprintf(f, "\n");
+ fprintf(f, "#define %sBUFFER_SIZE (1024)\n", prefix);
+ fprintf(f, "\n");
+ fprintf(f, "int main(void)\n");
+ fprintf(f, "{\n");
+ fprintf(f, "\tchar *buf, *p;\n");
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, "\tconst unsigned *ids;\n");
+ fprintf(f, "\tsize_t count, i;\n");
+ }
+ fprintf(f, "\tint r;\n");
+ fprintf(f, "\n");
+ fprintf(f, "\tbuf = malloc(%sBUFFER_SIZE);\n", prefix);
+ fprintf(f, "\tif (!buf) {\n");
+ fprintf(f, "\t\tperror(\"malloc\");\n");
+ fprintf(f, "\t\texit(1);\n");
+ fprintf(f, "\t}\n\n");
+ fprintf(f, "\tfor (;;) {\n");
+ fprintf(f, "\t\tp = fgets(buf, %sBUFFER_SIZE, stdin);\n", prefix);
+ fprintf(f, "\t\tif (!p) {\n");
+ fprintf(f, "\t\t\tbreak;\n");
+ fprintf(f, "\t\t}\n");
+
+ fprintf(f, "\t\tr = %smatch(", prefix);
+ switch (opt->io) {
+ case FSM_IO_PAIR:
+ fprintf(f, "p, p + strlen(p)");
+ break;
+ case FSM_IO_STR:
+ fprintf(f, "p");
+ break;
+ case FSM_IO_GETC:
+ // TODO: getc from string buffer
+ errno = ENOTSUP;
+ return -1;
+ }
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, ", &ids, &count");
+ }
+ fprintf(f, ");\n");
+
+ fprintf(f, "\t\tprintf(\"%%smatch\\n\", r == 0 ? \"no \" : \"\");\n");
+ if (opt->ambig != AMBIG_NONE) {
+ fprintf(f, "\t\tif (r) {\n");
+ fprintf(f, "\t\t\tprintf(\"ids:\");\n");
+ fprintf(f, "\t\t\tfor (i = 0; i < count; i++) {\n");
+ fprintf(f, "\t\t\t\tprintf(\" %%u\", ids[i]);\n");
+ fprintf(f, "\t\t\t}\n");
+ fprintf(f, "\t\t\tprintf(\"\\n\");\n");
+ fprintf(f, "\t\t}\n");
+ }
+ fprintf(f, "\t}\n");
+ fprintf(f, "\treturn 0;\n");
+ fprintf(f, "}\n");
+
+ return 0;
}
diff --git a/src/libfsm/vm.c b/src/libfsm/vm.c
index c9205815f..0eb4fa16c 100644
--- a/src/libfsm/vm.c
+++ b/src/libfsm/vm.c
@@ -18,6 +18,7 @@
#include "internal.h"
#include "vm/vm.h"
+#include "vm/retlist.h"
#include "print/ir.h"
// VM state:
@@ -92,6 +93,7 @@ fsm_vm_compile_with_options(const struct fsm *fsm,
static const struct dfavm_assembler_ir zero;
struct dfavm_assembler_ir a;
struct ir *ir;
+ struct ret_list retlist;
struct fsm_dfavm *vm;
assert(fsm != NULL);
@@ -102,9 +104,15 @@ fsm_vm_compile_with_options(const struct fsm *fsm,
return NULL;
}
+ if (!build_retlist(&retlist, ir)) {
+ free_ir(fsm, ir);
+ return NULL;
+ }
+
a = zero;
- if (!dfavm_compile_ir(&a, ir, vmopts)) {
+ if (!dfavm_compile_ir(&a, ir, &retlist, vmopts)) {
+ free_retlist(&retlist);
free_ir(fsm, ir);
return NULL;
}
@@ -116,6 +124,7 @@ fsm_vm_compile_with_options(const struct fsm *fsm,
return NULL;
}
+ free_retlist(&retlist);
dfavm_opasm_finalize_op(&a);
return vm;
diff --git a/src/libfsm/vm/Makefile b/src/libfsm/vm/Makefile
index fe54811b8..ce32cc585 100644
--- a/src/libfsm/vm/Makefile
+++ b/src/libfsm/vm/Makefile
@@ -4,6 +4,7 @@ SRC += src/libfsm/vm/ir.c
SRC += src/libfsm/vm/vm.c
SRC += src/libfsm/vm/v1.c
SRC += src/libfsm/vm/v2.c
+SRC += src/libfsm/vm/retlist.c
.for src in ${SRC:Msrc/libfsm/vm/*.c}
CFLAGS.${src} += -I src # XXX: for internal.h
diff --git a/src/libfsm/vm/ir.c b/src/libfsm/vm/ir.c
index f27df508d..608871b61 100644
--- a/src/libfsm/vm/ir.c
+++ b/src/libfsm/vm/ir.c
@@ -18,6 +18,7 @@
#include "libfsm/internal.h"
+#include "retlist.h"
#include "vm.h"
#include "print/ir.h"
@@ -277,7 +278,8 @@ opasm_free_list(struct dfavm_assembler_ir *a, struct dfavm_op_ir *op)
}
static struct dfavm_op_ir *
-opasm_new(struct dfavm_assembler_ir *a, enum dfavm_op_instr instr, enum dfavm_op_cmp cmp, unsigned char arg,
+opasm_new(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ enum dfavm_op_instr instr, enum dfavm_op_cmp cmp, unsigned char arg,
const struct ir_state *ir_state)
{
static const struct dfavm_op_ir zero;
@@ -306,20 +308,22 @@ opasm_new(struct dfavm_assembler_ir *a, enum dfavm_op_instr instr, enum dfavm_op
if (ir_state != NULL) {
op->example = ir_state->example;
- op->endids.ids = ir_state->endids.ids;
- op->endids.count = ir_state->endids.count;
+ op->ret = ir_state->isend
+ ? find_ret(retlist, ir_state->endids.ids, ir_state->endids.count)
+ : NULL;
}
return op;
}
static struct dfavm_op_ir *
-opasm_new_fetch(struct dfavm_assembler_ir *a, unsigned state, enum dfavm_op_end end,
+opasm_new_fetch(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ unsigned state, enum dfavm_op_end end,
const struct ir_state *ir_state)
{
struct dfavm_op_ir *op;
- op = opasm_new(a, VM_OP_FETCH, VM_CMP_ALWAYS, 0, ir_state);
+ op = opasm_new(a, retlist, VM_OP_FETCH, VM_CMP_ALWAYS, 0, ir_state);
if (op == NULL) {
return NULL;
}
@@ -331,12 +335,13 @@ opasm_new_fetch(struct dfavm_assembler_ir *a, unsigned state, enum dfavm_op_end
}
static struct dfavm_op_ir *
-opasm_new_stop(struct dfavm_assembler_ir *a, enum dfavm_op_cmp cmp, unsigned char arg, enum dfavm_op_end end,
+opasm_new_stop(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ enum dfavm_op_cmp cmp, unsigned char arg, enum dfavm_op_end end,
const struct ir_state *ir_state)
{
struct dfavm_op_ir *op;
- op = opasm_new(a, VM_OP_STOP, cmp, arg, ir_state);
+ op = opasm_new(a, retlist, VM_OP_STOP, cmp, arg, ir_state);
if (op == NULL) {
return NULL;
}
@@ -347,14 +352,15 @@ opasm_new_stop(struct dfavm_assembler_ir *a, enum dfavm_op_cmp cmp, unsigned cha
}
static struct dfavm_op_ir *
-opasm_new_branch(struct dfavm_assembler_ir *a, enum dfavm_op_cmp cmp, unsigned char arg, uint32_t dest_state,
+opasm_new_branch(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ enum dfavm_op_cmp cmp, unsigned char arg, uint32_t dest_state,
const struct ir_state *ir_state)
{
struct dfavm_op_ir *op;
assert(dest_state < a->nstates);
- op = opasm_new(a, VM_OP_BRANCH, cmp, arg, ir_state);
+ op = opasm_new(a, retlist, VM_OP_BRANCH, cmp, arg, ir_state);
if (op == NULL) {
return NULL;
}
@@ -488,7 +494,8 @@ analyze_table(struct dfa_table *table)
}
static int
-xlate_table_ranges(struct dfavm_assembler_ir *a, struct dfa_table *table, struct dfavm_op_ir **opp)
+xlate_table_ranges(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ struct dfa_table *table, struct dfavm_op_ir **opp)
{
int i,lo;
int count = 0;
@@ -508,8 +515,8 @@ xlate_table_ranges(struct dfavm_assembler_ir *a, struct dfa_table *table, struct
enum dfavm_op_cmp cmp = (i > lo+1) ? VM_CMP_LE : VM_CMP_EQ;
op = (dst < 0)
- ? opasm_new_stop(a, cmp, arg, VM_END_FAIL, table->ir_state)
- : opasm_new_branch(a, cmp, arg, dst, table->ir_state);
+ ? opasm_new_stop(a, retlist, cmp, arg, VM_END_FAIL, table->ir_state)
+ : opasm_new_branch(a, retlist, cmp, arg, dst, table->ir_state);
if (op == NULL) {
return -1;
@@ -526,8 +533,8 @@ xlate_table_ranges(struct dfavm_assembler_ir *a, struct dfa_table *table, struct
if (lo < FSM_SIGMA_COUNT) {
int64_t dst = table->tbl[lo];
*opp = (dst < 0)
- ? opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_FAIL, table->ir_state)
- : opasm_new_branch(a, VM_CMP_ALWAYS, 0, dst, table->ir_state);
+ ? opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_FAIL, table->ir_state)
+ : opasm_new_branch(a, retlist, VM_CMP_ALWAYS, 0, dst, table->ir_state);
if (*opp == NULL) {
return -1;
}
@@ -539,7 +546,8 @@ xlate_table_ranges(struct dfavm_assembler_ir *a, struct dfa_table *table, struct
}
static int
-xlate_table_cases(struct dfavm_assembler_ir *a, struct dfa_table *table, struct dfavm_op_ir **opp)
+xlate_table_cases(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ struct dfa_table *table, struct dfavm_op_ir **opp)
{
int i, count = 0;
int64_t mdst = table->mode.to;
@@ -554,8 +562,8 @@ xlate_table_cases(struct dfavm_assembler_ir *a, struct dfa_table *table, struct
}
*opp = (dst < 0)
- ? opasm_new_stop(a, VM_CMP_EQ, i, VM_END_FAIL, table->ir_state)
- : opasm_new_branch(a, VM_CMP_EQ, i, dst, table->ir_state);
+ ? opasm_new_stop(a, retlist, VM_CMP_EQ, i, VM_END_FAIL, table->ir_state)
+ : opasm_new_branch(a, retlist, VM_CMP_EQ, i, dst, table->ir_state);
if (*opp == NULL) {
return -1;
}
@@ -564,8 +572,8 @@ xlate_table_cases(struct dfavm_assembler_ir *a, struct dfa_table *table, struct
}
*opp = (mdst < 0)
- ? opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_FAIL, table->ir_state)
- : opasm_new_branch(a, VM_CMP_ALWAYS, 0, mdst, table->ir_state);
+ ? opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_FAIL, table->ir_state)
+ : opasm_new_branch(a, retlist, VM_CMP_ALWAYS, 0, mdst, table->ir_state);
if (*opp == NULL) {
return -1;
}
@@ -576,7 +584,8 @@ xlate_table_cases(struct dfavm_assembler_ir *a, struct dfa_table *table, struct
}
static int
-initial_translate_table(struct dfavm_assembler_ir *a, struct dfa_table *table, struct dfavm_op_ir **opp)
+initial_translate_table(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ struct dfa_table *table, struct dfavm_op_ir **opp)
{
int count, best_count;
struct dfavm_op_ir *op, *best_op;
@@ -604,13 +613,13 @@ initial_translate_table(struct dfavm_assembler_ir *a, struct dfa_table *table, s
assert(dst >= 0);
assert((size_t)dst < a->nstates);
- *opp = opasm_new_stop(a, VM_CMP_NE, sym, VM_END_FAIL, table->ir_state);
+ *opp = opasm_new_stop(a, retlist, VM_CMP_NE, sym, VM_END_FAIL, table->ir_state);
if (*opp == NULL) {
return -1;
}
opp = &(*opp)->next;
- *opp = opasm_new_branch(a, VM_CMP_ALWAYS, 0, dst, table->ir_state);
+ *opp = opasm_new_branch(a, retlist, VM_CMP_ALWAYS, 0, dst, table->ir_state);
if (*opp == NULL) {
return -1;
}
@@ -620,10 +629,10 @@ initial_translate_table(struct dfavm_assembler_ir *a, struct dfa_table *table, s
}
best_op = NULL;
- best_count = xlate_table_ranges(a, table, &best_op);
+ best_count = xlate_table_ranges(a, retlist, table, &best_op);
op = NULL;
- count = xlate_table_cases(a, table, &op);
+ count = xlate_table_cases(a, retlist, table, &op);
if (count < best_count) {
opasm_free_list(a,best_op);
@@ -682,7 +691,8 @@ dfa_table_init(struct dfa_table *table, long default_dest, const struct ir_state
}
static int
-initial_translate_partial(struct dfavm_assembler_ir *a, struct ir_state *st, struct dfavm_op_ir **opp)
+initial_translate_partial(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ struct ir_state *st, struct dfavm_op_ir **opp)
{
struct dfa_table table;
size_t i, ngrps;
@@ -696,11 +706,12 @@ initial_translate_partial(struct dfavm_assembler_ir *a, struct ir_state *st, str
group_to_table(&table, &st->u.partial.groups[i]);
}
- return initial_translate_table(a, &table, opp);
+ return initial_translate_table(a, retlist, &table, opp);
}
static int
-initial_translate_dominant(struct dfavm_assembler_ir *a, struct ir_state *st, struct dfavm_op_ir **opp)
+initial_translate_dominant(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ struct ir_state *st, struct dfavm_op_ir **opp)
{
struct dfa_table table;
size_t i, ngrps;
@@ -714,11 +725,12 @@ initial_translate_dominant(struct dfavm_assembler_ir *a, struct ir_state *st, st
group_to_table(&table, &st->u.dominant.groups[i]);
}
- return initial_translate_table(a, &table, opp);
+ return initial_translate_table(a, retlist, &table, opp);
}
static int
-initial_translate_error(struct dfavm_assembler_ir *a, struct ir_state *st, struct dfavm_op_ir **opp)
+initial_translate_error(struct dfavm_assembler_ir *a, const struct ret_list *retlist,
+ struct ir_state *st, struct dfavm_op_ir **opp)
{
struct dfa_table table;
size_t i, ngrps;
@@ -734,11 +746,13 @@ initial_translate_error(struct dfavm_assembler_ir *a, struct ir_state *st, struc
group_to_table(&table, &st->u.error.groups[i]);
}
- return initial_translate_table(a, &table, opp);
+ return initial_translate_table(a, retlist, &table, opp);
}
static struct dfavm_op_ir *
-initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, size_t ind)
+initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir,
+ const struct ret_list *retlist,
+ size_t ind)
{
struct ir_state *st;
struct dfavm_op_ir **opp;
@@ -747,22 +761,22 @@ initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, size_
opp = &a->ops[ind];
if (st->isend && st->strategy == IR_SAME && st->u.same.to == ind) {
- *opp = opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_SUCC, st);
+ *opp = opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_SUCC, st);
return a->ops[ind];
}
- *opp = opasm_new_fetch(a, ind, (st->isend) ? VM_END_SUCC : VM_END_FAIL, st);
+ *opp = opasm_new_fetch(a, retlist, ind, (st->isend) ? VM_END_SUCC : VM_END_FAIL, st);
opp = &(*opp)->next;
assert(*opp == NULL);
switch (st->strategy) {
case IR_NONE:
- *opp = opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_FAIL, st);
+ *opp = opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_FAIL, st);
opp = &(*opp)->next;
break;
case IR_SAME:
- *opp = opasm_new_branch(a, VM_CMP_ALWAYS, 0, st->u.same.to, st);
+ *opp = opasm_new_branch(a, retlist, VM_CMP_ALWAYS, 0, st->u.same.to, st);
opp = &(*opp)->next;
break;
@@ -774,19 +788,19 @@ initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, size_
* intelligently.
*/
case IR_PARTIAL:
- if (initial_translate_partial(a, st, opp) < 0) {
+ if (initial_translate_partial(a, retlist, st, opp) < 0) {
return NULL;
}
break;
case IR_DOMINANT:
- if (initial_translate_dominant(a, st, opp) < 0) {
+ if (initial_translate_dominant(a, retlist, st, opp) < 0) {
return NULL;
}
break;
case IR_ERROR:
- if (initial_translate_error(a, st, opp) < 0) {
+ if (initial_translate_error(a, retlist, st, opp) < 0) {
return NULL;
}
break;
@@ -804,20 +818,6 @@ initial_translate_state(struct dfavm_assembler_ir *a, const struct ir *ir, size_
return a->ops[ind];
}
-static int
-initial_translate(const struct ir *ir, struct dfavm_assembler_ir *a)
-{
- size_t i,n;
-
- n = a->nstates;
-
- for (i=0; i < n; i++) {
- a->ops[i] = initial_translate_state(a, ir, i);
- }
-
- return 0;
-}
-
static void
fixup_dests(struct dfavm_assembler_ir *a)
{
@@ -1063,8 +1063,15 @@ print_all_states(struct dfavm_assembler_ir *a)
}
int
-dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, struct fsm_vm_compile_opts opts)
+dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, const struct ret_list *retlist,
+ struct fsm_vm_compile_opts opts)
{
+ size_t i;
+
+ assert(a != NULL);
+ assert(ir != NULL);
+ assert(retlist != NULL);
+
a->nstates = ir->n;
a->start = ir->start;
@@ -1080,8 +1087,8 @@ dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, struct fsm_v
return 0;
}
- if (initial_translate(ir, a) < 0) {
- return 0;
+ for (i=0; i < a->nstates; i++) {
+ a->ops[i] = initial_translate_state(a, ir, retlist, i);
}
fixup_dests(a);
@@ -1121,7 +1128,7 @@ dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, struct fsm_v
return 0;
}
- a->ops[0] = opasm_new_stop(a, VM_CMP_ALWAYS, 0, VM_END_FAIL, NULL);
+ a->ops[0] = opasm_new_stop(a, retlist, VM_CMP_ALWAYS, 0, VM_END_FAIL, NULL);
if (a->ops[0] == NULL) {
return -1;
}
diff --git a/src/libfsm/vm/retlist.c b/src/libfsm/vm/retlist.c
new file mode 100644
index 000000000..97d4eb3f0
--- /dev/null
+++ b/src/libfsm/vm/retlist.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2008-2024 Katherine Flavel
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include "libfsm/internal.h"
+
+#include "libfsm/print/ir.h"
+#include "libfsm/vm/retlist.h"
+
+static bool
+append_ret(struct ret_list *list,
+ const fsm_end_id_t *ids, size_t count)
+{
+ const size_t low = 16; /* must be power of 2 */
+ const size_t factor = 2; /* must be even */
+
+ assert(list != NULL);
+
+ // TODO: alloc callbacks
+ if (list->count == 0) {
+ list->a = malloc(low * sizeof *list->a);
+ if (list->a == NULL) {
+ return false;
+ }
+ } else if (list->count >= low && (list->count & (list->count - 1)) == 0) {
+ void *tmp;
+ size_t new = list->count * factor;
+ if (new < list->count) {
+ errno = E2BIG;
+ perror("realloc");
+ exit(EXIT_FAILURE);
+ }
+
+ tmp = realloc(list->a, new * sizeof *list->a);
+ if (tmp == NULL) {
+ return false;
+ }
+
+ list->a = tmp;
+ }
+
+ list->a[list->count].ids = ids;
+ list->a[list->count].count = count;
+
+ list->count++;
+
+ return true;
+}
+
+static int
+cmp_ret(const void *pa, const void *pb)
+{
+ const struct ret *a = pa;
+ const struct ret *b = pb;
+
+ if (a->count < b->count) { return -1; }
+ if (a->count > b->count) { return +1; }
+
+ if (a->count == 0) {
+ return 0;
+ }
+
+ assert(a->ids != NULL);
+ assert(b->ids != NULL);
+
+ return memcmp(a->ids, b->ids, a->count * sizeof *a->ids);
+}
+
+struct ret *
+find_ret(const struct ret_list *list,
+ const fsm_end_id_t *ids, size_t count)
+{
+ struct ret key;
+
+ key.count = count;
+ key.ids = ids;
+
+ return bsearch(&key, list->a, list->count, sizeof *list->a, cmp_ret);
+}
+
+bool
+build_retlist(struct ret_list *list, const struct ir *ir)
+{
+ size_t i;
+
+ assert(list != NULL);
+ assert(ir != NULL);
+
+ list->count = 0;
+
+ for (i = 0; i < ir->n; i++) {
+ if (!ir->states[i].isend) {
+ continue;
+ }
+
+ if (!append_ret(list, ir->states[i].endids.ids, ir->states[i].endids.count)) {
+ return false;
+ }
+ }
+
+ if (list->count > 0) {
+ size_t j = 0;
+
+ /* sort for both dedup and bsearch */
+ qsort(list->a, list->count, sizeof *list->a, cmp_ret);
+
+ /* deduplicate based on endids only.
+ * j is the start of a run; i increments until we find
+ * the start of the next run */
+ for (size_t i = 1; i < list->count; i++) {
+ assert(i > j);
+ if (cmp_ret(&list->a[j], &list->a[i]) == 0) {
+ continue;
+ }
+
+ j++;
+ list->a[j] = list->a[i];
+ }
+
+ list->count = j + 1;
+
+ assert(list->count > 0);
+ }
+
+ return true;
+}
+
+void
+free_retlist(struct ret_list *list)
+{
+ if (list->count > 0) {
+ free(list->a);
+ }
+}
+
diff --git a/src/libfsm/vm/retlist.h b/src/libfsm/vm/retlist.h
new file mode 100644
index 000000000..5e67218b9
--- /dev/null
+++ b/src/libfsm/vm/retlist.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2024 Katherine Flavel
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#ifndef FSM_INTERNAL_RETLIST_H
+#define FSM_INTERNAL_RETLIST_H
+
+struct ir;
+
+struct ret {
+ size_t count;
+ const fsm_end_id_t *ids;
+};
+
+struct ret_list {
+ size_t count;
+ struct ret *a;
+};
+
+struct ret *
+find_ret(const struct ret_list *list, const fsm_end_id_t *ids, size_t count);
+
+bool
+build_retlist(struct ret_list *list, const struct ir *ir);
+
+void
+free_retlist(struct ret_list *list);
+
+#endif
+
diff --git a/src/libfsm/vm/vm.h b/src/libfsm/vm/vm.h
index bfb28a774..c96bfa84b 100644
--- a/src/libfsm/vm/vm.h
+++ b/src/libfsm/vm/vm.h
@@ -20,6 +20,8 @@
#define DFAVM_MAGIC "DFAVM$"
struct ir;
+struct ret;
+struct ret_list;
struct fsm_vm_compile_opts;
struct dfavm_op_ir_pool;
@@ -74,15 +76,8 @@ struct dfavm_op_ir {
*/
uint32_t index;
-
-
-const char *example;
-
-struct {
- fsm_end_id_t *ids; /* NULL -> 0 */
- size_t count;
-} endids;
-
+ const char *example;
+ const struct ret *ret;
uint32_t num_incoming; // number of branches to this instruction
int in_trace;
@@ -198,7 +193,7 @@ const char *
cmp_name(int cmp);
int
-dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, struct fsm_vm_compile_opts opts);
+dfavm_compile_ir(struct dfavm_assembler_ir *a, const struct ir *ir, const struct ret_list *retlist, struct fsm_vm_compile_opts opts);
struct fsm_dfavm *
dfavm_compile_vm(const struct dfavm_assembler_ir *a, struct fsm_vm_compile_opts opts);
diff --git a/src/re/main.c b/src/re/main.c
index 86258f318..53abe2d52 100644
--- a/src/re/main.c
+++ b/src/re/main.c
@@ -334,30 +334,19 @@ conflict(FILE *f, const struct fsm_options *opt,
}
static int
-accept_c(FILE *f, const struct fsm_options *opt,
+comment_c(FILE *f, const struct fsm_options *opt,
const fsm_end_id_t *ids, size_t count,
- void *lang_opaque, void *hook_opaque)
+ void *hook_opaque)
{
- unsigned n;
size_t i;
assert(opt != NULL);
- assert(lang_opaque == NULL);
assert(hook_opaque == NULL);
(void) opt;
- (void) lang_opaque;
(void) hook_opaque;
- n = 0;
-
- for (i = 0; i < count; i++) {
- n |= 1U << ids[i];
- }
-
- fprintf(f, "return %#x;", (unsigned) n);
-
- fprintf(f, " /* ");
+ fprintf(f, "/* ");
for (i = 0; i < count; i++) {
assert(ids[i] < matchc);
@@ -375,30 +364,19 @@ accept_c(FILE *f, const struct fsm_options *opt,
}
static int
-accept_rust(FILE *f, const struct fsm_options *opt,
+comment_rust(FILE *f, const struct fsm_options *opt,
const fsm_end_id_t *ids, size_t count,
- void *lang_opaque, void *hook_opaque)
+ void *hook_opaque)
{
- unsigned n;
size_t i;
assert(opt != NULL);
- assert(lang_opaque == NULL);
assert(hook_opaque == NULL);
(void) opt;
- (void) lang_opaque;
(void) hook_opaque;
- n = 0;
-
- for (i = 0; i < count; i++) {
- n |= 1U << ids[i];
- }
-
- fprintf(f, "return Some(%#x)", (unsigned) n);
-
- fprintf(f, " /* ");
+ fprintf(f, "// ");
for (i = 0; i < count; i++) {
assert(ids[i] < matchc);
@@ -410,37 +388,23 @@ accept_rust(FILE *f, const struct fsm_options *opt,
}
}
- fprintf(f, " */");
-
return 0;
}
static int
-accept_llvm(FILE *f, const struct fsm_options *opt,
+comment_llvm(FILE *f, const struct fsm_options *opt,
const fsm_end_id_t *ids, size_t count,
- void *lang_opaque, void *hook_opaque)
+ void *hook_opaque)
{
- unsigned n;
size_t i;
assert(opt != NULL);
- assert(lang_opaque != NULL);
assert(hook_opaque == NULL);
(void) opt;
(void) hook_opaque;
- n = 0;
-
- for (i = 0; i < count; i++) {
- n |= 1U << ids[i];
- }
-
- i = * (const size_t *) lang_opaque;
-
- fprintf(f, "[u%#x, %%ret%zu],", (unsigned) n, i);
-
- fprintf(f, " ; ");
+ fprintf(f, "; ");
for (i = 0; i < count; i++) {
assert(ids[i] < matchc);
@@ -452,98 +416,35 @@ accept_llvm(FILE *f, const struct fsm_options *opt,
}
}
- fprintf(f, "\n");
-
return 0;
}
static int
-accept_dot(FILE *f, const struct fsm_options *opt,
+comment_dot(FILE *f, const struct fsm_options *opt,
const fsm_end_id_t *ids, size_t count,
- void *lang_opaque, void *hook_opaque)
-{
- fsm_state_t s;
- size_t i;
-
- assert(opt != NULL);
- assert(lang_opaque != NULL);
- assert(hook_opaque == NULL);
-
- (void) opt;
- (void) hook_opaque;
-
- s = * (fsm_state_t *) lang_opaque;
-
- fprintf(f, "label = <");
-
- if (!opt->anonymous_states) {
- fprintf(f, "%u", s);
-
- if (count > 0) {
- fprintf(f, "
");
- }
- }
-
- for (i = 0; i < count; i++) {
- assert(ids[i] < matchc);
-
- fprintf(f, "#%u", ids[i]);
-
- if (i + 1 < count) {
- fprintf(f, ",");
- }
- }
-
- fprintf(f, ">");
-
- /* TODO: centralise to libfsm/print/dot.c */
- if (opt->comments) {
- fprintf(f, " /* ");
-
- for (i = 0; i < count; i++) {
- assert(ids[i] < matchc);
-
- fprintf(f, "\"%s\"", matchv[ids[i]]); /* XXX: escape string (and comment) */
-
- if (i + 1 < count) {
- fprintf(f, ", ");
- }
- }
-
- fprintf(f, " */");
- }
-
- return 0;
-}
-
-static int
-accept_json(FILE *f, const struct fsm_options *opt,
- const fsm_end_id_t *ids, size_t count,
- void *lang_opaque, void *hook_opaque)
+ void *hook_opaque)
{
size_t i;
assert(opt != NULL);
- assert(lang_opaque == NULL);
assert(hook_opaque == NULL);
(void) opt;
- (void) lang_opaque;
(void) hook_opaque;
- fprintf(f, "[ ");
+ fprintf(f, "/* ");
for (i = 0; i < count; i++) {
assert(ids[i] < matchc);
- fprintf(f, "%u", ids[i]);
+ fprintf(f, "\"%s\"", matchv[ids[i]]); /* XXX: escape string (and comment) */
if (i + 1 < count) {
fprintf(f, ", ");
}
}
- fprintf(f, " ]");
+ fprintf(f, " */");
return 0;
}
@@ -1146,33 +1047,30 @@ main(int argc, char *argv[])
}
if (fsm_lang != FSM_PRINT_NONE) {
- /* TODO: print examples in comments for end states;
- * patterns in comments for the whole FSM */
-
switch (fsm_lang) {
case FSM_PRINT_NONE:
break;
case FSM_PRINT_C:
case FSM_PRINT_VMC:
- hooks.accept = accept_c;
+ hooks.comment = comment_c;
break;
case FSM_PRINT_RUST:
- hooks.accept = accept_rust;
+ case FSM_PRINT_GO: /* close enough */
+ hooks.comment = comment_rust;
break;
case FSM_PRINT_LLVM:
- hooks.accept = accept_llvm;
+ hooks.comment = comment_llvm;
break;
case FSM_PRINT_DOT:
case FSM_PRINT_VMDOT:
- hooks.accept = patterns ? accept_dot : NULL;
+ hooks.comment = patterns ? comment_dot : NULL;
break;
case FSM_PRINT_JSON:
- hooks.accept = patterns ? accept_json : NULL;
break;
default:
diff --git a/src/retest/runner.c b/src/retest/runner.c
index ea5a54372..0dac9ab38 100644
--- a/src/retest/runner.c
+++ b/src/retest/runner.c
@@ -379,7 +379,7 @@ runner_init_compiled(struct fsm *fsm,
case IMPL_LLVM:
r->u.impl_llvm.h = h;
- r->u.impl_llvm.func = (bool (*)(const char *, const char *)) (uintptr_t) dlsym(h, "fsm_main");
+ r->u.impl_llvm.func = (bool (*)(const char *, const char *)) (uintptr_t) dlsym(h, "fsm.main");
break;
case IMPL_GO:
|