Skip to content

Commit

Permalink
Merge pull request #499 from katef/sv/randomized-generation-and-misc-…
Browse files Browse the repository at this point in the history
…fixes

Randomized generation and misc. fixes
  • Loading branch information
katef authored Oct 12, 2024
2 parents bfeb7ce + cf8fc65 commit c375229
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 16 deletions.
2 changes: 1 addition & 1 deletion include/adt/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
/* If non-zero, expand the timer macros defined below, otherwise
* they compile away. */
#ifndef TRACK_TIMES
#define TRACK_TIMES 0
#define TRACK_TIMES (0 && !BUILD_FOR_FUZZER)
#endif

#if EXPENSIVE_CHECKS && TRACK_TIMES
Expand Down
7 changes: 6 additions & 1 deletion include/fsm/walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ fsm_walk_edges(const struct fsm *fsm, void *opaque,
* functionally equivalent cases makes testing dramatically faster,
* but exploring every edge could be added later.
*
* If randomized is zero then it will generate the first label in the
* label set, otherwise a label from the set will be chosen using rand()
* (favoring printable characters). The caller can use srand()
* beforehand to set a PRNG seed.
*
* Note: fsm is non-const because it calls fsm_trim on the FSM
* internally. This records the shortest distance from each state to an
* end state, which is used to prune branches that would not produce
Expand All @@ -114,7 +119,7 @@ fsm_generate_matches_cb(const struct fsm *fsm,
const char *input, size_t input_length,
fsm_state_t end_state, void *opaque);
int
fsm_generate_matches(struct fsm *fsm, size_t max_length,
fsm_generate_matches(struct fsm *fsm, size_t max_length, int randomized,
fsm_generate_matches_cb *cb, void *opaque);

/* Callback provided for the most basic use case for
Expand Down
2 changes: 1 addition & 1 deletion src/fsm/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ main(int argc, char *argv[])
}

if (generate_bounds > 0) {
r = fsm_generate_matches(fsm, generate_bounds, fsm_generate_cb_printf_escaped, &opt);
r = fsm_generate_matches(fsm, generate_bounds, 0, fsm_generate_cb_printf_escaped, &opt);
}

fsm_free(fsm);
Expand Down
2 changes: 0 additions & 2 deletions src/libfsm/determinise.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,6 @@ fsm_determinise_with_config(struct fsm *nfa,
}

ac_env.output_count = 0;

/* All elements in sclosures[] are interned, so they will be freed later. */
} while ((curr = stack_pop(stack)));

{
Expand Down
71 changes: 63 additions & 8 deletions src/libfsm/gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct gen_ctx {
fsm_generate_matches_cb *cb;

bool done;
bool randomized;

size_t buf_ceil;
size_t buf_used;
Expand Down Expand Up @@ -106,7 +107,7 @@ struct gen_ctx {
static bool
gen_init_outer(struct fsm *fsm, size_t max_length,
fsm_generate_matches_cb *cb, void *opaque,
bool randomized, unsigned seed);
bool randomized);

static bool
gen_init(struct gen_ctx *ctx, struct fsm *fsm);
Expand Down Expand Up @@ -139,17 +140,21 @@ static bool
grow_stack(struct gen_ctx *ctx);

int
fsm_generate_matches(struct fsm *fsm, size_t max_length,
fsm_generate_matches(struct fsm *fsm, size_t max_length, int randomized,
fsm_generate_matches_cb *cb, void *opaque)
{
if (max_length == 0) {
errno = EINVAL;
return 0;
}

if (!fsm_has(fsm, fsm_isend)) {
return 1; /* no end state -> nothing to do */
}

INIT_TIMERS();
TIME(&pre);
int res = gen_init_outer(fsm, max_length, cb, opaque, false, 0);
int res = gen_init_outer(fsm, max_length, cb, opaque, randomized != 0);
TIME(&post);

DIFF_MSEC("fsm_generate_matches", pre, post, NULL);
Expand Down Expand Up @@ -199,7 +204,7 @@ fsm_generate_cb_printf(const struct fsm *fsm,
static bool
gen_init_outer(struct fsm *fsm, size_t max_length,
fsm_generate_matches_cb *cb, void *opaque,
bool randomized, unsigned seed)
bool randomized)
{
int res = false;
if (fsm == NULL || cb == NULL || max_length == 0) {
Expand All @@ -208,9 +213,6 @@ gen_init_outer(struct fsm *fsm, size_t max_length,

assert(fsm_all(fsm, fsm_isdfa)); /* DFA-only */

assert(!randomized); /* not yet supported */
(void)seed;

#if LOG_GEN > 1
fprintf(stderr, "%s: %u states\n", __func__, fsm_countstates(fsm));
#endif
Expand All @@ -224,6 +226,7 @@ gen_init_outer(struct fsm *fsm, size_t max_length,
.max_length = max_length,
.cb = cb,
.opaque = opaque,
.randomized = randomized,
};

if (!gen_init(&ctx, fsm)) {
Expand Down Expand Up @@ -524,6 +527,55 @@ first_symbol(const uint64_t *symbols)
return 0;
}

static unsigned char
random_symbol(const uint64_t *symbols)
{
bool has_zero = false;
unsigned i = 0;

/* printable and non-printable character choices */
size_t choice_count = 0;
unsigned char choices[256];
size_t np_choice_count = 0;
unsigned char np_choices[256];

while (i < 256) {
const uint64_t w = symbols[i/64];
if ((i & 63) == 0 && w == 0) {
i += 64;
continue;
}
if (w & (1ULL << (i & 63))) {
if (i == 0) {
has_zero = true;
} else if (isprint(i)) {
choices[choice_count++] = (unsigned char)i;
} else {
np_choices[np_choice_count++] = (unsigned char)i;
}
}
i++;
}

if (choice_count > 0) {
const size_t c = rand() % choice_count;
return choices[c];
}

if (np_choice_count > 0) {
const size_t c = rand() % np_choice_count;
return np_choices[c];
}

/* Prefer anything besides 0x00 if present, since that will truncate the string. */
if (has_zero) {
return 0;
}

assert(!"empty set");
return 0;
}

#if DUMP_EDGES
static void
dump_edges(fsm_state_t state, struct edge_set *edges)
Expand All @@ -538,6 +590,7 @@ dump_edges(fsm_state_t state, struct edge_set *edges)
size_t i = 0;
while (edge_set_group_iter_next(&ei, &eg)) {
const unsigned char symbol = first_symbol(eg.symbols);
const unsigned char symbol = random_symbol(eg.symbols);
fprintf(stderr, "%s: %d -- %zu/%zu -- 0x%02x (%c) -> %d\n",
__func__, state, i, count,
symbol, isprint(symbol) ? symbol : '.', eg.to);
Expand Down Expand Up @@ -585,7 +638,9 @@ sfs_step_edges(struct gen_ctx *ctx, struct gen_stack_frame *sf)
struct edge_group_iter_info eg;

if (iter_next_transition(ctx, sf, &eg)) {
const unsigned char symbol = first_symbol(eg.symbols);
const unsigned char symbol = ctx->randomized
? random_symbol(eg.symbols)
: first_symbol(eg.symbols);
const fsm_state_t state = eg.to;

LOG(2, "sfs_step_edges: got edge 0x%x ('%c')\n",
Expand Down
4 changes: 4 additions & 0 deletions src/libfsm/trim.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,10 @@ integrity_check(const char *descr, const struct fsm *fsm)
return;
#endif

#if !EXPENSIVE_CHECKS
return;
#endif

if (LOG_TRIM > 1) {
fprintf(stderr, "integrity check: %s...\n", descr);
}
Expand Down
2 changes: 1 addition & 1 deletion src/re/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,7 @@ main(int argc, char *argv[])
}

if (generate_bounds > 0) {
if (!fsm_generate_matches(fsm, generate_bounds, fsm_generate_cb_printf_escaped, &opt)) {
if (!fsm_generate_matches(fsm, generate_bounds, 0, fsm_generate_cb_printf_escaped, &opt)) {
exit(EXIT_FAILURE);
}

Expand Down
1 change: 1 addition & 0 deletions tests/gen/gen1.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ int main(void) {
assert(fsm != NULL);

if (!fsm_generate_matches(fsm, MAX_EXP_MATCH + 1 /* for \0 */,
0,
gtest_matches_cb, &matches)) {
fprintf(stderr, "fsm_generate_matches: error\n");
exit(EXIT_FAILURE);
Expand Down
2 changes: 1 addition & 1 deletion tests/gen/gen2.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ int main(void) {
struct fsm *fsm = gtest_fsm_of_matches(&matches);
assert(fsm != NULL);

if (!fsm_generate_matches(fsm, MAX_EXP_MATCH + 1, gtest_matches_cb, &matches)) {
if (!fsm_generate_matches(fsm, MAX_EXP_MATCH + 1, 0, gtest_matches_cb, &matches)) {
fprintf(stderr, "fsm_generate_matches: error\n");
exit(EXIT_FAILURE);
}
Expand Down
2 changes: 1 addition & 1 deletion tests/gen/gen3.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ int main(void) {
struct fsm *fsm = build();
assert(fsm != NULL);

if (!fsm_generate_matches(fsm, 11, matches_cb, NULL)) {
if (!fsm_generate_matches(fsm, 11, 0, matches_cb, NULL)) {
fprintf(stderr, "fsm_generate_matches: error\n");
exit(EXIT_FAILURE);
}
Expand Down

0 comments on commit c375229

Please sign in to comment.