Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add endid support to re_strings. #464

Merged
merged 6 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ SUBDIR += tests/pcre-flags
SUBDIR += tests/pcre-repeat
SUBDIR += tests/pred
SUBDIR += tests/re_literal
SUBDIR += tests/re_strings
SUBDIR += tests/reverse
SUBDIR += tests/trim
SUBDIR += tests/union
Expand Down
6 changes: 6 additions & 0 deletions include/fsm/fsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ fsm_setend(struct fsm *fsm, fsm_state_t state, int end);
int
fsm_setendid(struct fsm *fsm, fsm_end_id_t id);

/* Associate a numeric ID with a specific end state in an fsm.
* Returns 1 on success, 0 on error.
* */
int
fsm_setendidstate(struct fsm *fsm, fsm_state_t end_state, fsm_end_id_t id);

/* Get the end IDs associated with an end state, if any.
* If id_buf has enough cells to store all the end IDs (according
* to id_buf_count) then they are written into id_buf[] and
Expand Down
6 changes: 3 additions & 3 deletions include/re/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#ifndef RE_STRINGS_H
#define RE_STRINGS_H

struct fsm;
#include <fsm/fsm.h>
struct fsm_options;

struct re_strings;
Expand Down Expand Up @@ -42,10 +42,10 @@ void
re_strings_free(struct re_strings *g);

int
re_strings_add_raw(struct re_strings *g, const void *p, size_t n);
re_strings_add_raw(struct re_strings *g, const void *p, size_t n, const fsm_end_id_t *endid);

int
re_strings_add_str(struct re_strings *g, const char *s);
re_strings_add_str(struct re_strings *g, const char *s, const fsm_end_id_t *endid);

struct fsm *
re_strings_build(struct re_strings *g,
Expand Down
10 changes: 10 additions & 0 deletions src/libfsm/endids.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ fsm_setendid(struct fsm *fsm, fsm_end_id_t id)
return 1;
}

int
fsm_setendidstate(struct fsm *fsm, fsm_state_t end_state, fsm_end_id_t id)
{
enum fsm_endid_set_res sres = fsm_endid_set(fsm, end_state, id);
if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
return 0;
}
return 1;
}

enum fsm_getendids_res
fsm_getendids(const struct fsm *fsm, fsm_state_t end_state,
size_t id_buf_count, fsm_end_id_t *id_buf,
Expand Down
1 change: 1 addition & 0 deletions src/libfsm/libfsm.syms
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ fsm_getendids
fsm_setendid
fsm_mapendids
fsm_increndids
fsm_setendidstate

fsm_countedges
fsm_countstates
Expand Down
26 changes: 24 additions & 2 deletions src/libre/ac.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <ctype.h>

#include <fsm/fsm.h>
#include <adt/stateset.h>

#include "ac.h"

Expand All @@ -21,6 +22,9 @@ enum { POOL_BLOCK_SIZE = 256 };
struct trie_state {
struct trie_state *children[256];
struct trie_state *fail;
/* use a state set as an endid set */
struct state_set *endids;

fsm_state_t st;
unsigned int index;
unsigned int output:1;
Expand Down Expand Up @@ -73,6 +77,7 @@ newstate(struct trie_graph *g)
st->index = ++g->nstates;

st->output = 0;
st->endids = NULL;

return st;
}
Expand All @@ -86,6 +91,10 @@ cleanup_pool(struct trie_graph *g)
p = g->pool;
g->pool = p->next;

for (size_t i = 0; i < p->n; i++) {
state_set_free(p->states[i].endids);
}

free(p->states);
free(p);
}
Expand Down Expand Up @@ -126,7 +135,7 @@ trie_create(void)
}

struct trie_graph *
trie_add_word(struct trie_graph *g, const char *w, size_t n)
trie_add_word(struct trie_graph *g, const char *w, size_t n, const fsm_end_id_t *endid)
katef marked this conversation as resolved.
Show resolved Hide resolved
{
struct trie_state *st;
size_t i;
Expand Down Expand Up @@ -159,6 +168,9 @@ trie_add_word(struct trie_graph *g, const char *w, size_t n)
g->depth = n;
}

if (endid != NULL) {
state_set_add(&st->endids, NULL, (fsm_state_t)*endid);
}
return g;
}

Expand Down Expand Up @@ -278,7 +290,7 @@ trie_to_fsm_state(struct trie_state *ts, struct fsm *fsm,
assert(fsm != NULL);
assert(q != NULL);

if (ts->output && have_end) {
if (ts->output && have_end && state_set_empty(ts->endids)) {
*q = single_end;
return 1;
}
Expand Down Expand Up @@ -315,6 +327,16 @@ trie_to_fsm_state(struct trie_state *ts, struct fsm *fsm,

if (ts->output) {
fsm_setend(fsm, st, 1);

struct state_iter si;
fsm_state_t state;
state_set_reset(ts->endids, &si);
while (state_set_next(&si, &state)) {
fsm_end_id_t endid = (fsm_end_id_t)state;
if (!fsm_setendidstate(fsm, st, endid)) {
return 0;
}
}
}

*q = st;
Expand Down
5 changes: 4 additions & 1 deletion src/libre/ac.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#ifndef AC_H
#define AC_H

#include "fsm/fsm.h"

struct fsm;
struct fsm_state;
struct fsm_options;
Expand All @@ -20,7 +22,8 @@ void
trie_free(struct trie_graph *g);

struct trie_graph *
trie_add_word(struct trie_graph *g, const char *w, size_t n);
trie_add_word(struct trie_graph *g, const char *w, size_t n,
const fsm_end_id_t *endid);

int
trie_add_failure_edges(struct trie_graph *g);
Expand Down
10 changes: 5 additions & 5 deletions src/libre/re_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ re_strings(const struct fsm_options *opt, const char *a[], size_t n,
}

for (i = 0; i < n; i++) {
if (!re_strings_add_str(g, a[i])) {
if (!re_strings_add_str(g, a[i], NULL)) {
goto error;
}
}
Expand Down Expand Up @@ -64,20 +64,20 @@ re_strings_free(struct re_strings *g)
}

int
re_strings_add_raw(struct re_strings *g, const void *p, size_t n)
re_strings_add_raw(struct re_strings *g, const void *p, size_t n, const fsm_end_id_t *endid)
{
assert(p != NULL);
assert(n > 0);

return trie_add_word((struct trie_graph *) g, p, n) != NULL;
return trie_add_word((struct trie_graph *) g, p, n, endid) != NULL;
}

int
re_strings_add_str(struct re_strings *g, const char *s)
re_strings_add_str(struct re_strings *g, const char *s, const fsm_end_id_t *endid)
{
assert(s != NULL);

return re_strings_add_raw(g, s, strlen(s));
return re_strings_add_raw(g, s, strlen(s), endid);
}

struct fsm *
Expand Down
26 changes: 26 additions & 0 deletions tests/re_strings/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
.include "../../share/mk/top.mk"

TEST.tests/re_strings != ls -1 tests/re_strings/re_strings*.c
TEST_SRCDIR.tests/re_strings = tests/re_strings
TEST_OUTDIR.tests/re_strings = ${BUILD}/tests/re_strings

.for n in ${TEST.tests/re_strings:T:R:C/^re_strings//}
test:: ${TEST_OUTDIR.tests/re_strings}/res${n}
SRC += ${TEST_SRCDIR.tests/re_strings}/re_strings${n}.c
CFLAGS.${TEST_SRCDIR.tests/re_strings}/re_strings${n}.c = -UNDEBUG

${TEST_OUTDIR.tests/re_strings}/run${n}: ${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o ${TEST_OUTDIR.tests/re_strings}/testutil.o
${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/re_strings}/run${n} ${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o ${TEST_OUTDIR.tests/re_strings}/testutil.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a

${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o: tests/re_strings/testutil.h

${TEST_OUTDIR.tests/re_strings}/res${n}: ${TEST_OUTDIR.tests/re_strings}/run${n}
( ${TEST_OUTDIR.tests/re_strings}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/re_strings}/res${n}

.for lib in ${LIB:Mlibfsm} ${LIB:Mlibre}
${TEST_OUTDIR.tests/re_strings}/run${n}: ${BUILD}/lib/${lib:R}.a
.endfor
.endfor

${TEST_OUTDIR.tests/re_strings}/testutil.o: tests/re_strings/testutil.c
${CC} ${CFLAGS} -c -o ${TEST_OUTDIR.tests/re_strings}/testutil.o tests/re_strings/testutil.c
21 changes: 21 additions & 0 deletions tests/re_strings/re_strings1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "testutil.h"

const char *strings[] = {
"aa",
"ab",
"ac",
"ba",
"bb",
"bc",
"ca",
"cb",
"cc",
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
17 changes: 17 additions & 0 deletions tests/re_strings/re_strings2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "testutil.h"

const char *strings[] = {
"first",
"duplicate",
"duplicate",
"duplicate",
"last",
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
15 changes: 15 additions & 0 deletions tests/re_strings/re_strings3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "testutil.h"

const char *strings[] = {
"duplicate",
"duplicate",
"duplicate",
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
13 changes: 13 additions & 0 deletions tests/re_strings/re_strings4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "testutil.h"

const char *strings[] = {
/* empty */
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
71 changes: 71 additions & 0 deletions tests/re_strings/testutil.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "testutil.h"

#include <stdbool.h>
#include <assert.h>

#include "fsm/fsm.h"
#include "fsm/options.h"

#include "re/re.h"
#include "re/strings.h"

static struct fsm_options opt;

#define MAX_INPUTS 100
static fsm_end_id_t id_buf[MAX_INPUTS];

int
run_test(const char **strings)
{
struct re_strings *s = re_strings_new();
assert(s != NULL);

fsm_end_id_t id = 0;
const char **input = strings;
while (*input != NULL) {
if (!re_strings_add_str(s, *input, &id)) {
assert(!"re_strings_add_str");
}

input++;
id++;
assert(id < MAX_INPUTS);
}

const int flags = 0; /* not anchored */

struct fsm *fsm = re_strings_build(s, &opt, flags);
assert(fsm != NULL);

/* Each literal string input should match, and the set of
* matching endids should include the expected one. */
id = 0;
input = strings;
while (*input != NULL) {
fsm_state_t end;
const char **string = input;
const int res = fsm_exec(fsm, fsm_sgetc, string, &end, NULL);
assert(res > 0); /* match */

size_t written;
enum fsm_getendids_res eres = fsm_getendids(fsm, end,
MAX_INPUTS, id_buf, &written);
assert(eres == FSM_GETENDIDS_FOUND);
bool found = false;
for (size_t i = 0; i < written; i++) {
if (id_buf[i] == id) {
found = true;
break;
}
}
assert(found);

input++;
id++;
}

re_strings_free(s);
fsm_free(fsm);

return EXIT_SUCCESS;
}
11 changes: 11 additions & 0 deletions tests/re_strings/testutil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef TESTUTIL_H
#define TESTUTIL_H

#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>

int
run_test(const char **strings);

#endif
Loading