Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvements for epsilon removal and determinisation #441

Merged
Merged
129 changes: 109 additions & 20 deletions src/adt/edgeset.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <inttypes.h>

#define LOG_BITSET 0
#define LOG_BSEARCH 0

#include "libfsm/internal.h" /* XXX: for allocating struct fsm_edge, and the edges array */

Expand Down Expand Up @@ -184,6 +185,100 @@ edge_set_advise_growth(struct edge_set **pset, const struct fsm_alloc *alloc,
return 1;
}

enum fsp_res {
FSP_FOUND_INSERT_POSITION,
FSP_FOUND_VALUE_PRESENT,
};

/* Use binary search to find the first position N where set->groups[N].to >= state,
* which includes the position immediately following the last entry. Return an enum
* which indicates whether state is already present. */
static enum fsp_res
find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use this in edge_set_find and edge_set_contains too?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can binary search here because the struct edge_group array is sorted by .to and we're searching by destination state, but edge_set_find and edge_set_contains are searching by edge label. If they were frequently searched and it became a hotspot in the profile we could do linear-time reindexing and bsearch on those, but currently they don't even show up in the profile. As far as I can tell they're only even called from inside fsm_walk2 and the minimisation test oracle.

{
size_t lo = 0, hi = set->count;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: looking for %d in %p (count %zu)\n",
__func__, state, (void *)set, set->count);
}

#if EXPENSIVE_CHECKS
/* invariant: input is unique and sorted */
for (size_t i = 1; i < set->count; i++) {
assert(set->groups[i - 1].to < set->groups[i].to);
}
#endif

if (set->count == 0) {
if (LOG_BSEARCH) {
fprintf(stderr, "%s: empty, returning 0\n", __func__);
}
*dst = 0;
return FSP_FOUND_INSERT_POSITION;
} else {
if (LOG_BSEARCH) {
fprintf(stderr, "%s: fast path: looking for %d, set->groups[last].to %d\n",
__func__, state, set->groups[hi - 1].to);
}

/* Check the last entry so we can append in constant time. */
const fsm_state_t last = set->groups[hi - 1].to;
if (state > last) {
*dst = hi;
return FSP_FOUND_INSERT_POSITION;
} else if (state == last) {
*dst = hi - 1;
return FSP_FOUND_VALUE_PRESENT;
}
}

size_t mid;
while (lo < hi) { /* lo <= mid < hi */
mid = lo + (hi - lo)/2; /* avoid overflow */
const struct edge_group *eg = &set->groups[mid];
const fsm_state_t cur = eg->to;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: lo %zu, hi %zu, mid %zu, cur %d, looking for %d\n",
__func__, lo, hi, mid, cur, state);
}

if (state == cur) {
*dst = mid;
return FSP_FOUND_VALUE_PRESENT;
} else if (state > cur) {
lo = mid + 1;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: new lo %zd\n", __func__, lo);
}

/* Update mid if we're about to halt, because we're looking
* for the first position >= state, not the last position <=. */
if (lo == hi) {
mid = lo;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: special case, updating mid to %zd\n", __func__, mid);
}
}
} else if (state < cur) {
hi = mid;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: new hi %zd\n", __func__, hi);
}
}
}

if (LOG_BSEARCH) {
fprintf(stderr, "%s: halting at %zd (looking for %d, cur %d)\n",
__func__, mid, state, set->groups[mid].to);
}

/* dst is now the first position > state (== case is handled above),
* which may be one past the end of the array. */
assert(mid == set->count || set->groups[mid].to > state);
*dst = mid;
return FSP_FOUND_INSERT_POSITION;
}

int
edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
uint64_t symbols[256/64], fsm_state_t state)
Expand Down Expand Up @@ -223,30 +318,24 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
assert(set->count <= set->ceil);

#if LOG_BITSET
fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
symbols[0], symbols[1], symbols[2], symbols[3],
state, (void *)set);
fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
symbols[0], symbols[1], symbols[2], symbols[3],
state, (void *)set);
#endif

/* Linear search for a group with the same destination
* state, or the position where that group would go. */
for (i = 0; i < set->count; i++) {
switch (find_state_position(set, state, &i)) {
case FSP_FOUND_VALUE_PRESENT:
assert(i < set->count);
eg = &set->groups[i];

if (eg->to == state) {
/* This API does not indicate whether that
* symbol -> to edge was already present. */
size_t i;
for (i = 0; i < 256/64; i++) {
eg->symbols[i] |= symbols[i];
}
dump_edge_set(set);
return 1;
} else if (eg->to > state) {
break; /* will shift down and insert below */
} else {
continue;
for (i = 0; i < 256/64; i++) {
eg->symbols[i] |= symbols[i];
}
dump_edge_set(set);
return 1;

break;
case FSP_FOUND_INSERT_POSITION:
break; /* continue below */
}

/* insert/append at i */
Expand Down
32 changes: 21 additions & 11 deletions src/adt/stateset.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
struct state_set {
const struct fsm_alloc *alloc;
fsm_state_t *a;
size_t i;
size_t n;
size_t i; /* used */
size_t n; /* ceil */
};

int
Expand Down Expand Up @@ -138,7 +138,8 @@ state_set_cmp(const struct state_set *a, const struct state_set *b)
}

/*
* Return where an item would be, if it were inserted
* Return where an item would be, if it were inserted.
* When insertion would append this returns one past the array.
*/
static size_t
state_set_search(const struct state_set *set, fsm_state_t state)
Expand All @@ -150,6 +151,11 @@ state_set_search(const struct state_set *set, fsm_state_t state)
assert(!IS_SINGLETON(set));
assert(set->a != NULL);

/* fast path: append case */
if (set->i > 0 && state > set->a[set->i - 1]) {
return set->i;
}

start = mid = 0;
end = set->i;

Expand All @@ -161,6 +167,12 @@ state_set_search(const struct state_set *set, fsm_state_t state)
end = mid;
} else if (r > 0) {
start = mid + 1;
/* update mid if we're about to halt, because
* we're looking for the first position >= state,
* not the last position <= */
if (start == end) {
mid = start;
}
} else {
return mid;
}
Expand Down Expand Up @@ -242,7 +254,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
*/
if (!state_set_empty(set)) {
i = state_set_search(set, state);
if (set->a[i] == state) {
if (i < set->i && set->a[i] == state) {
return 1;
}
}
Expand All @@ -261,11 +273,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
set->n *= 2;
}

if (state_set_cmpval(state, set->a[i]) > 0) {
i++;
}

if (i <= set->i) {
if (i < set->i) {
memmove(&set->a[i + 1], &set->a[i], (set->i - i) * (sizeof *set->a));
}

Expand All @@ -276,6 +284,8 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
set->i = 1;
}

/* This assert can be pretty expensive in -O0 but in -O3 it has very
* little impact on the overall runtime. */
assert(state_set_contains(set, state));

return 1;
Expand Down Expand Up @@ -470,7 +480,7 @@ state_set_remove(struct state_set **setp, fsm_state_t state)
}

i = state_set_search(set, state);
if (set->a[i] == state) {
if (i < set->i && set->a[i] == state) {
if (i < set->i) {
memmove(&set->a[i], &set->a[i + 1], (set->i - i - 1) * (sizeof *set->a));
}
Expand Down Expand Up @@ -524,7 +534,7 @@ state_set_contains(const struct state_set *set, fsm_state_t state)
}

i = state_set_search(set, state);
if (set->a[i] == state) {
if (i < set->i && set->a[i] == state) {
return 1;
}

Expand Down
95 changes: 77 additions & 18 deletions src/libfsm/determinise.c
Original file line number Diff line number Diff line change
Expand Up @@ -2016,28 +2016,87 @@ static void
sort_and_dedup_dst_buf(fsm_state_t *buf, size_t *used)
{
const size_t orig_used = *used;
qsort(buf, orig_used, sizeof(buf[0]), cmp_fsm_state_t);

/* squash out duplicates */
size_t rd = 1;
size_t wr = 1;
while (rd < orig_used) {
if (buf[rd - 1] == buf[rd]) {
rd++; /* skip */
} else {
buf[wr] = buf[rd];
rd++;
wr++;
}

if (orig_used <= 1) {
return; /* no change */
}

*used = wr;
#if EXPENSIVE_CHECKS
assert(wr <= orig_used);
for (size_t i = 1; i < *used; i++) {
assert(buf[i - 1] < buf[i]);
/* Figure out what the min and max values are, because
* when the difference between them is not too large it
* can be significantly faster to avoid qsort here. */
fsm_state_t min = (fsm_state_t)-1;
fsm_state_t max = 0;
for (size_t i = 0; i < orig_used; i++) {
const fsm_state_t cur = buf[i];
if (cur < min) { min = cur; }
if (cur > max) { max = cur; }
}

/* If there's only one unique value, then we're done. */
if (min == max) {
buf[0] = min;
*used = 1;
return;
}

/* 81920 = 10 KB buffer on the stack. This must be divisible by 64.
* Set to 0 to disable. */
#define QSORT_CUTOFF 81920

if (QSORT_CUTOFF == 0 || max - min > QSORT_CUTOFF) {
/* If the bitset would be very large but sparse due to
* extreme values, then fall back on using qsort and
* then sweeping over the array to squash out
* duplicates. */
qsort(buf, orig_used, sizeof(buf[0]), cmp_fsm_state_t);

/* squash out duplicates */
size_t rd = 1;
size_t wr = 1;
while (rd < orig_used) {
if (buf[rd - 1] == buf[rd]) {
rd++; /* skip */
} else {
buf[wr] = buf[rd];
rd++;
wr++;
}
}

*used = wr;
#if EXPENSIVE_CHECKS
assert(wr <= orig_used);
for (size_t i = 1; i < *used; i++) {
assert(buf[i - 1] < buf[i]);
}
#endif
} else {
/* Convert the array into a bitset and back, which sorts
* and deduplicates in the process. Add 1 to avoid a zero-
* zero-length array error if QSORT_CUTOFF is 0. */
uint64_t bitset[QSORT_CUTOFF/64 + 1];
const size_t words = u64bitset_words(max - min + 1);
memset(bitset, 0x00, words * sizeof(bitset[0]));

for (size_t i = 0; i < orig_used; i++) {
u64bitset_set(bitset, buf[i] - min);
}

size_t dst = 0;
for (size_t i = 0; i < words; i++) {
const uint64_t w = bitset[i];
if (w != 0) { /* skip empty words */
uint64_t bit = 0x1;
for (size_t b_i = 0; b_i < 64; b_i++, bit <<= 1) {
if (w & bit) {
buf[dst] = 64*i + b_i + min;
dst++;
}
}
}
}
*used = dst;
}
}

static int
Expand Down
4 changes: 2 additions & 2 deletions src/libre/ast_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,11 @@ addedge_literal(struct comp_env *env, enum re_flags re_flags,
assert(to < env->fsm->statecount);

if (re_flags & RE_ICASE) {
if (!fsm_addedge_literal(fsm, from, to, tolower((unsigned char) c))) {
if (!fsm_addedge_literal(fsm, from, to, (char)tolower((unsigned char) c))) {
return 0;
}

if (!fsm_addedge_literal(fsm, from, to, toupper((unsigned char) c))) {
if (!fsm_addedge_literal(fsm, from, to, (char)toupper((unsigned char) c))) {
return 0;
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/retest/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ parse_escapes(char *s, char **errpos, int *lenp)

ndig++;
} else {
s[j++] = ccode;
s[j++] = (char)ccode;
st = ST_BARE;

if (!hexcurly) {
Expand Down