Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simple lx -l json printer #118

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/lx/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ print_name(const char *name)
{ "test", NULL },
{ "dot", lx_print_dot },
{ "dump", lx_print_dump },
{ "json", lx_print_json },
{ "zdot", lx_print_zdot },
{ "c", lx_print_c },
{ "h", lx_print_h }
Expand Down
1 change: 1 addition & 0 deletions src/lx/print.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ lx_print lx_print_c;
lx_print lx_print_h;
lx_print lx_print_dot;
lx_print lx_print_dump;
lx_print lx_print_json;
lx_print lx_print_zdot;

#endif
Expand Down
1 change: 1 addition & 0 deletions src/lx/print/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ SRC += src/lx/print/c.c
SRC += src/lx/print/h.c
SRC += src/lx/print/dot.c
SRC += src/lx/print/dump.c
SRC += src/lx/print/json.c
SRC += src/lx/print/zdot.c

.for src in ${SRC:Msrc/lx/print/*.c}
Expand Down
163 changes: 163 additions & 0 deletions src/lx/print/json.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Copyright 2019 Jamey Sharp
*
* See LICENCE for the full copyright terms.
*/

#include <assert.h>
#include <stdio.h>
#include <errno.h>

#include <adt/set.h>

#include <fsm/fsm.h>
#include <fsm/pred.h>
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And <fsm/walk.h> for fsm_all()


#include "libfsm/internal.h" /* XXX */

#include "lx/ast.h"
#include "lx/print.h"

/* TODO: centralise with libfsm */
static unsigned int
indexof(const struct fsm *fsm, const struct fsm_state *state)
{
struct fsm_state *s;
unsigned int i;

assert(fsm != NULL);
assert(state != NULL);

for (s = fsm->sl, i = 0; s != NULL; s = s->next, i++) {
if (s == state) {
return i;
}
}

assert(!"unreached");
return 0;
}

static unsigned int
zindexof(const struct ast *ast, const struct ast_zone *zone)
{
struct ast_zone *z;
unsigned int i;

assert(ast != NULL);
assert(zone != NULL);

for (z = ast->zl, i = 0; z != NULL; z = z->next, i++) {
if (z == zone) {
return i;
}
}

assert(!"unreached");
return 0;
}

static int
print_zone(FILE *f, const struct ast *ast, const struct ast_zone *z)
{
struct fsm_state *s, *st;
int ret;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused variable


assert(f != NULL);
assert(z != NULL);
assert(z->fsm != NULL);
assert(fsm_all(z->fsm, fsm_isdfa));
assert(ast != NULL);

fprintf(f, " {\n");
fprintf(f, " \"initial_state\": %u,\n", indexof(z->fsm, z->fsm->start));
fprintf(f, " \"states\": [\n");

for (s = z->fsm->sl; s != NULL; s = s->next) {
struct fsm_edge *e;
struct set_iter it;

fprintf(f, " {\n");

e = set_first(s->edges, &it);

if (fsm_isend(z->fsm, s)) {
const struct ast_mapping *m = s->opaque;
assert(m != NULL);

if (m->token != NULL) {
fprintf(f, " \"token\": \"$%s\",\n", m->token->s);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These strings should be escaped, per src/print/json.c's json_escputc()

}
if (m->to != NULL) {
fprintf(f, " \"next_zone\": %u,\n", zindexof(ast, m->to));
}

fprintf(f, " \"accepts\": true%s\n", e != NULL ? "," : "");
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Terminology: libfsm probably should have its .isend field renamed to "accept" at some point. I'd like the naming to be consistent.

Likewise for your "initial_zone" field, where that's called either the starting zone (when talking about the graph), or the global zone (when talking about scope).

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And the same for "initial_state"

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And for "target", which libfsm currently calls "dst" or "to".

}

if (e != NULL) {
int first = 1;
fprintf(f, " \"transitions\": [");
for (; e != NULL; e = set_next(&it)) {
struct set_iter jt;
for (st = set_first(e->sl, &jt); st != NULL; st = set_next(&jt)) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

libfsm has an IR for code generation which provides a few optimisations for generated DFA. I'm not sure if you'd want to use it here. It provides different "strategies" for each state, where e.g. symbols are presented grouped as x..y ranges, or e.g. if all symbols transition to the same state. If you're going on to generate code from the json, I think it probably would make sense to make use of that.

To use that IR, I'd first add json output to libfsm (cut & pasted from the current C), which just walks through that IR and outputs what it's given. The IR was introduced here: #94

Then re -pl json xyz would give json for a single regexp, too.

fprintf(f, "%s\n { \"symbol\": %u, \"target\": %u }",
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about these numeric symbols. Do you think a single-letter string would be more fitting?

first ? "" : ",",
e->symbol,
indexof(z->fsm, st));
first = 0;
}
}
fprintf(f, "\n ]\n");
}

fprintf(f, " }%s\n", s->next ? "," : "");
}

fprintf(f, " ]\n");
fprintf(f, " }%s\n", z->next ? "," : "");

return 0;
}

void
lx_print_json(FILE *f, const struct ast *ast)
{
const struct ast_zone *z;
unsigned int zn;

assert(f != NULL);

for (z = ast->zl; z != NULL; z = z->next) {
if (!fsm_all(z->fsm, fsm_isdfa)) {
errno = EINVAL;
return;
}
}

fprintf(f, "{\n");
fprintf(f, " \"prefix\": { \"api\": \"%s\", \"tok\": \"%s\", \"lx\": \"%s\" },\n",
prefix.api, prefix.tok, prefix.lx);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feels a bit strange to output the prefixes here - these are to affect the generated code, not to be part of it. But I know you're intending to use this json to go and generate code, so it does make sense to include them here.

fprintf(f, " \"initial_zone\": %u,\n", zindexof(ast, ast->global));
fprintf(f, " \"zones\": [\n");

if (print_progress) {
zn = 0;
}

for (z = ast->zl; z != NULL; z = z->next) {
if (print_progress) {
if (important(zn)) {
fprintf(stderr, " z%u", zn);
}
zn++;
}

if (-1 == print_zone(f, ast, z)) {
return; /* XXX: handle error */
}
}

fprintf(f, " ]\n");
fprintf(f, "}\n");
}