Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fsm_detect_required_characters #492

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ SUBDIR += tests/intersect
SUBDIR += tests/eclosure
SUBDIR += tests/equals
SUBDIR += tests/subtract
SUBDIR += tests/detect_required
SUBDIR += tests/determinise
SUBDIR += tests/endids
SUBDIR += tests/epsilons
Expand Down
18 changes: 18 additions & 0 deletions include/adt/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#define ADT_BITMAP_H

#include <stdint.h>
#include <limits.h>

#include <stdio.h>
#include "print/esc.h"

struct fsm_state;
Expand All @@ -23,6 +26,9 @@ bm_get(const struct bm *bm, size_t i);
void
bm_set(struct bm *bm, size_t i);

void
bm_unset(struct bm *bm, size_t i);

/* Get a writeable pointer to the Nth word of the char set bitmap,
* or NULL if out of bounds. */
uint64_t *
Expand Down Expand Up @@ -51,5 +57,17 @@ bm_snprint(const struct bm *bm, const struct fsm_options *opt,
int boxed,
escputc *escputc);

void
bm_copy(struct bm *dst, const struct bm *src);

void
bm_intersect(struct bm *dst, const struct bm *src);

void
bm_union(struct bm *dst, const struct bm *src);

int
bm_any(const struct bm *bm);

#endif

31 changes: 31 additions & 0 deletions include/fsm/walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#ifndef FSM_WALK_H
#define FSM_WALK_H

#include <adt/bitmap.h>

struct fsm;
struct fsm_state;

Expand Down Expand Up @@ -128,5 +130,34 @@ fsm_generate_matches_cb fsm_generate_cb_printf;
* to escape all characters or just nonprintable ones. */
fsm_generate_matches_cb fsm_generate_cb_printf_escaped;

/* Walk a DFA and detect which characters MUST appear in the input for a
* match to be possible. For example, if input for the DFA corresponding
* to /^(abc|dbe)$/ does not contain 'b' at all, there's no way it can
* ever match, so executing the regex is unnecessary. This does not detect
* which characters must appear before/after others or how many times, just
* which must be present.
*
* The input must be a DFA. When run with EXPENSIVE_CHECKS this will
* check and return ERROR_MISUSE if it is not, otherwise this is an
* unchecked error.
*
* The character map will be cleared before populating. If *count is
* non-NULL it will be updated with how many required characters were
* found.
*
* There is an optional step_limit -- if this is reached, then it will
* return FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED and a
* cleared bitmap, because any partial information could still have been
* contradicted later. If the step_limit is 0 it will be ignored. */
enum fsm_detect_required_characters_res {
FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN,
FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED,
FSM_DETECT_REQUIRED_CHARACTERS_ERROR_MISUSE = -1,
FSM_DETECT_REQUIRED_CHARACTERS_ERROR_ALLOC = -2,
};
enum fsm_detect_required_characters_res
fsm_detect_required_characters(const struct fsm *dfa, size_t step_limit,
uint64_t charmap[4], size_t *count);

#endif

16 changes: 16 additions & 0 deletions man/fsm.1/fsm.1.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
<!ENTITY G.opt "<option>-G</option>&nbsp;&length.arg;">
<!ENTITY k.opt "<option>-k</option>&nbsp;&io.arg;">
<!ENTITY i.opt "<option>-i</option>&nbsp;&iterations.arg;">
<!ENTITY G.opt "<option>-S</option>&nbsp;&limit.arg;">
<!ENTITY U.opt "<option>-U</option>&nbsp;&charset.arg;">
<!ENTITY X.opt "<option>-X</option>">

Expand Down Expand Up @@ -325,6 +326,14 @@
</listitem>
</varlistentry>

<varlistentry>
<term>&S.opt;</term>

<listitem>
<para>Set a step limit for long-running operations.</para>
</listitem>
</varlistentry>

<varlistentry>
<term>&t.opt;</term>

Expand Down Expand Up @@ -487,6 +496,13 @@
of each state in the &fsm;.
Printed to &stdout.lit;; exit status is always true.</td>
</tr>
<tr>
<td><code>requiredchars</code></td>
<td rowspan="1" role="na">&ndash;</td>
<td rowspan="1">Determine characters that must appear in any
inputs that could match the &fsm;. Exit status is true
unless the step limit was reached.</td>
</tr>
</tbody>
</table>
</listitem>
Expand Down
41 changes: 41 additions & 0 deletions src/adt/bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <limits.h>
#include <ctype.h>
#include <stdint.h>
#include <string.h>

#include <adt/bitmap.h>
#include <adt/u64bitset.h>
Expand All @@ -34,6 +35,15 @@ bm_set(struct bm *bm, size_t i)
u64bitset_set(bm->map, i);
}

void
bm_unset(struct bm *bm, size_t i)
{
assert(bm != NULL);
assert(i <= UCHAR_MAX);

u64bitset_clear(bm->map, i);
}

uint64_t *
bm_nth_word(struct bm *bm, size_t n)
{
Expand Down Expand Up @@ -325,3 +335,34 @@ bm_snprint(const struct bm *bm, const struct fsm_options *opt,

return -1;
}

void
bm_copy(struct bm *dst, const struct bm *src)
{
memcpy(dst, src, sizeof(*src));
}

void
bm_intersect(struct bm *dst, const struct bm *src)
{
for (size_t i = 0; i < sizeof(src->map)/sizeof(src->map[0]); i++) {
dst->map[i] &= src->map[i];
}
}

void
bm_union(struct bm *dst, const struct bm *src)
{
for (size_t i = 0; i < sizeof(src->map)/sizeof(src->map[0]); i++) {
dst->map[i] |= src->map[i];
}
}

int
bm_any(const struct bm *bm)
{
for (size_t i = 0; i < sizeof(bm->map)/sizeof(bm->map[0]); i++) {
if (bm->map[i]) { return 1; }
}
return 0;
}
50 changes: 48 additions & 2 deletions src/fsm/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <fsm/parser.h>

#include <adt/stateset.h> /* XXX */
#include <adt/u64bitset.h>

#include "libfsm/internal.h" /* XXX */

Expand Down Expand Up @@ -101,6 +102,16 @@ query_epsilonclosure(const struct fsm *fsm, fsm_state_t state)
abort();
}

static int
query_required_chars(const struct fsm *fsm, fsm_state_t state)
{
(void) fsm;
(void) state;

/* never called */
abort();
}

static void
usage(void)
{
Expand Down Expand Up @@ -227,7 +238,9 @@ static int
{ "hasambiguity", fsm_has, fsm_hasnondeterminism },
{ "hasnondeterminism", fsm_has, fsm_hasnondeterminism },
{ "hasepsilons", fsm_has, fsm_hasepsilons },
{ "epsilons", fsm_has, fsm_hasepsilons }
{ "epsilons", fsm_has, fsm_hasepsilons },
{ "requiredchars", NULL, query_required_chars },
{ "chars", NULL, query_required_chars },
};

assert(name != NULL);
Expand Down Expand Up @@ -378,6 +391,7 @@ main(int argc, char *argv[])
int xfiles;
int r;
size_t generate_bounds = 0;
size_t step_limit = 0;

int (*query)(const struct fsm *, fsm_state_t);
int (*walk )(const struct fsm *,
Expand All @@ -404,7 +418,7 @@ main(int argc, char *argv[])
{
int c;

while (c = getopt(argc, argv, "h" "aCcgwXe:k:i:" "xpq:l:dG:mrt:EU:W:"), c != -1) {
while (c = getopt(argc, argv, "h" "aCcgwXe:k:i:" "xpq:l:dG:mrt:ES:U:W:"), c != -1) {
switch (c) {
case 'a': opt.anonymous_states = 1; break;
case 'c': opt.consolidate_edges = 1; break;
Expand Down Expand Up @@ -451,6 +465,10 @@ main(int argc, char *argv[])
}
break;

case 'S':
step_limit = strtoul(optarg, NULL, 10);
break; /* can be 0 */

case 'h':
usage();
exit(EXIT_SUCCESS);
Expand Down Expand Up @@ -669,6 +687,34 @@ main(int argc, char *argv[])
closure_free(fsm, closures, fsm->statecount);

return 0;
} else if (query == query_required_chars) {
assert(walk == NULL);
uint64_t charmap[4];
size_t count;
enum fsm_detect_required_characters_res res;
res = fsm_detect_required_characters(fsm, step_limit, charmap, &count);
if (res == FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED) {
fprintf(stderr, "fsm_detect_required_characters: step limit reached (%zd)\n", step_limit);
exit(EXIT_FAILURE);
} else {
assert(res == FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN);
char buf[257] = {0};
size_t used = 0;
for (size_t i = 0; i < 256; i++) {
if (u64bitset_get(charmap, i)) {
buf[used++] = (char)i;
}
}
printf("%zd ", count);
for (size_t i = 0; i < used; i++) {
c_escputc_str(stdout, &opt, buf[i]);
}
printf("\n");

fsm_free(fsm);
fsm_to_cleanup = NULL;
return EXIT_SUCCESS;
}
} else {
assert(walk != NULL);
r |= !walk(fsm, query);
Expand Down
1 change: 1 addition & 0 deletions src/libfsm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ SRC += src/libfsm/complete.c
SRC += src/libfsm/consolidate.c
SRC += src/libfsm/clone.c
SRC += src/libfsm/closure.c
SRC += src/libfsm/detect_required.c
SRC += src/libfsm/edge.c
SRC += src/libfsm/empty.c
SRC += src/libfsm/end.c
Expand Down
Loading