Skip to content

Commit

Permalink
Add tests/re_strings. Switch to a set of endids.
Browse files Browse the repository at this point in the history
This uses a `struct state_set` since sizeof(fsm_state) ==
sizeof(fsm_end_id_t), and it's probably not worth making a separate ADT
just for these.

The second test checks that duplicated strings get all their endids set.
The previous implementation (a single endid, or ENDID_NONE) dropped all
but the last endid defined.
  • Loading branch information
silentbicycle committed Apr 29, 2024
1 parent 3d4beb1 commit 986144b
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 6 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ SUBDIR += tests/pcre-flags
SUBDIR += tests/pcre-repeat
SUBDIR += tests/pred
SUBDIR += tests/re_literal
SUBDIR += tests/re_strings
SUBDIR += tests/reverse
SUBDIR += tests/trim
SUBDIR += tests/union
Expand Down
22 changes: 16 additions & 6 deletions src/libre/ac.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
#include <ctype.h>

#include <fsm/fsm.h>
#include <adt/stateset.h>

#include "ac.h"

#define ENDID_NONE ((fsm_end_id_t)-1)
enum { POOL_BLOCK_SIZE = 256 };

struct trie_state {
Expand All @@ -26,7 +26,9 @@ struct trie_state {
unsigned int index;
unsigned int output:1;
unsigned int have_st:1;
fsm_end_id_t endid; /* or ENDID_NONE */

/* use a state set as an endid set */
struct state_set *endids;
};

struct trie_pool {
Expand Down Expand Up @@ -75,6 +77,7 @@ newstate(struct trie_graph *g)
st->index = ++g->nstates;

st->output = 0;
st->endids = NULL;

return st;
}
Expand Down Expand Up @@ -161,7 +164,9 @@ trie_add_word(struct trie_graph *g, const char *w, size_t n, const fsm_end_id_t
g->depth = n;
}

st->endid = (endid == NULL ? ENDID_NONE : *endid);
if (endid != NULL) {
state_set_add(&st->endids, NULL, (fsm_state_t)*endid);
}
return g;
}

Expand Down Expand Up @@ -281,7 +286,7 @@ trie_to_fsm_state(struct trie_state *ts, struct fsm *fsm,
assert(fsm != NULL);
assert(q != NULL);

if (ts->output && have_end && ts->endid == ENDID_NONE) {
if (ts->output && have_end && state_set_empty(ts->endids)) {
*q = single_end;
return 1;
}
Expand Down Expand Up @@ -318,8 +323,13 @@ trie_to_fsm_state(struct trie_state *ts, struct fsm *fsm,

if (ts->output) {
fsm_setend(fsm, st, 1);
if (ts->endid != ENDID_NONE) {
if (!fsm_setendidstate(fsm, st, ts->endid)) {

struct state_iter si;
fsm_state_t state;
state_set_reset(ts->endids, &si);
while (state_set_next(&si, &state)) {
fsm_end_id_t endid = (fsm_end_id_t)state;
if (!fsm_setendidstate(fsm, st, endid)) {
return 0;
}
}
Expand Down
26 changes: 26 additions & 0 deletions tests/re_strings/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
.include "../../share/mk/top.mk"

TEST.tests/re_strings != ls -1 tests/re_strings/re_strings*.c
TEST_SRCDIR.tests/re_strings = tests/re_strings
TEST_OUTDIR.tests/re_strings = ${BUILD}/tests/re_strings

.for n in ${TEST.tests/re_strings:T:R:C/^re_strings//}
test:: ${TEST_OUTDIR.tests/re_strings}/res${n}
SRC += ${TEST_SRCDIR.tests/re_strings}/re_strings${n}.c
CFLAGS.${TEST_SRCDIR.tests/re_strings}/re_strings${n}.c = -UNDEBUG

${TEST_OUTDIR.tests/re_strings}/run${n}: ${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o ${TEST_OUTDIR.tests/re_strings}/testutil.o
${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/re_strings}/run${n} ${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o ${TEST_OUTDIR.tests/re_strings}/testutil.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a

${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o: tests/re_strings/testutil.h

${TEST_OUTDIR.tests/re_strings}/res${n}: ${TEST_OUTDIR.tests/re_strings}/run${n}
( ${TEST_OUTDIR.tests/re_strings}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/re_strings}/res${n}

.for lib in ${LIB:Mlibfsm} ${LIB:Mlibre}
${TEST_OUTDIR.tests/re_strings}/run${n}: ${BUILD}/lib/${lib:R}.a
.endfor
.endfor

${TEST_OUTDIR.tests/re_strings}/testutil.o: tests/re_strings/testutil.c
${CC} ${CFLAGS} -c -o ${TEST_OUTDIR.tests/re_strings}/testutil.o tests/re_strings/testutil.c
21 changes: 21 additions & 0 deletions tests/re_strings/re_strings1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "testutil.h"

const char *strings[] = {
"aa",
"ab",
"ac",
"ba",
"bb",
"bc",
"ca",
"cb",
"cc",
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
17 changes: 17 additions & 0 deletions tests/re_strings/re_strings2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "testutil.h"

const char *strings[] = {
"first",
"duplicate",
"duplicate",
"duplicate",
"last",
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
69 changes: 69 additions & 0 deletions tests/re_strings/testutil.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include "testutil.h"

#include <stdbool.h>
#include <assert.h>

#include "fsm/fsm.h"
#include "fsm/options.h"

#include "re/re.h"
#include "re/strings.h"

static struct fsm_options opt;

#define MAX_INPUTS 100
static fsm_end_id_t id_buf[MAX_INPUTS];

int
run_test(const char **strings)
{
struct re_strings *s = re_strings_new();
assert(s != NULL);

fsm_end_id_t id = 0;
const char **input = strings;
while (*input != NULL) {
if (!re_strings_add_str(s, *input, &id)) {
assert(!"re_strings_add_str");
}

input++;
id++;
assert(id < MAX_INPUTS);
}

const int flags = 0; /* not anchored */

struct fsm *fsm = re_strings_build(s, &opt, flags);
assert(fsm != NULL);

/* Each literal string input should match, and the set of
* matching endids should include the expected one. */
id = 0;
input = strings;
while (*input != NULL) {
fsm_state_t end;
const char **string = input;
const int res = fsm_exec(fsm, fsm_sgetc, string, &end, NULL);
assert(res > 0); /* match */

size_t written;
enum fsm_getendids_res eres = fsm_getendids(fsm, end,
MAX_INPUTS, id_buf, &written);
assert(eres == FSM_GETENDIDS_FOUND);
bool found = false;
for (size_t i = 0; i < written; i++) {
if (id_buf[i] == id) {
found = true;
break;
}
}
assert(found);

input++;
id++;
}

re_strings_free(s);
return EXIT_SUCCESS;
}
11 changes: 11 additions & 0 deletions tests/re_strings/testutil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef TESTUTIL_H
#define TESTUTIL_H

#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>

int
run_test(const char **strings);

#endif

0 comments on commit 986144b

Please sign in to comment.