Skip to content

Commit

Permalink
Merge branch 'main' into sv/integrate-combinable-DFA-capture-resolution
Browse files Browse the repository at this point in the history
There were merge conflicts, which had to do with the return type of
idmap_iter's callback -- on `main` it was `void`, on the branch it was
`int` and indicated whether iterations should continue or halt. I picked
the `int` form, and updated `idmap_iter` so that it was actually
checked, because it probably doesn't make sense to continue iterating
when earlier steps have errored out.
  • Loading branch information
silentbicycle committed Sep 27, 2023
2 parents 722260a + 1ca3726 commit 5ece637
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 56 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,15 @@ jobs:
path: ${{ env.build }}
key: build-${{ matrix.make }}-${{ matrix.os }}-${{ matrix.cc }}-${{ matrix.debug }}-${{ matrix.san }}-${{ github.sha }}

# note we do the fuzzing unconditionally; each run adds to the corpus
# note we do the fuzzing unconditionally; each run adds to the corpus.
#
# We only run fuzzing for PRs in the base repo, this prevents attempting
# to purge the seed cache from a PR syncing a forked repo, which fails
# due to a permissions error (I'm unsure why, I think PRs from clones can't
# purge a cache in CI presumably for security/DoS reasons). PRs from clones
# still run fuzzing, just from empty, and do not save their seeds.
- name: Restore seeds (mode ${{ matrix.mode }})
if: github.repository == 'katef/libfsm'
uses: actions/cache/restore@v3
id: cache-seeds
with:
Expand Down
2 changes: 1 addition & 1 deletion include/adt/idmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ idmap_get(const struct idmap *m, fsm_state_t state_id,
size_t buf_size, unsigned *buf, size_t *written);

/* Iterator callback.
* The return value indicates whether iteration should continue. */
* Return status indicates whether to continue. */
typedef int
idmap_iter_fun(fsm_state_t state_id, unsigned value, void *opaque);

Expand Down
129 changes: 109 additions & 20 deletions src/adt/edgeset.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <inttypes.h>

#define LOG_BITSET 0
#define LOG_BSEARCH 0

#include "libfsm/internal.h" /* XXX: for allocating struct fsm_edge, and the edges array */

Expand Down Expand Up @@ -184,6 +185,100 @@ edge_set_advise_growth(struct edge_set **pset, const struct fsm_alloc *alloc,
return 1;
}

enum fsp_res {
FSP_FOUND_INSERT_POSITION,
FSP_FOUND_VALUE_PRESENT,
};

/* Use binary search to find the first position N where set->groups[N].to >= state,
* which includes the position immediately following the last entry. Return an enum
* which indicates whether state is already present. */
static enum fsp_res
find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst)
{
size_t lo = 0, hi = set->count;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: looking for %d in %p (count %zu)\n",
__func__, state, (void *)set, set->count);
}

#if EXPENSIVE_CHECKS
/* invariant: input is unique and sorted */
for (size_t i = 1; i < set->count; i++) {
assert(set->groups[i - 1].to < set->groups[i].to);
}
#endif

if (set->count == 0) {
if (LOG_BSEARCH) {
fprintf(stderr, "%s: empty, returning 0\n", __func__);
}
*dst = 0;
return FSP_FOUND_INSERT_POSITION;
} else {
if (LOG_BSEARCH) {
fprintf(stderr, "%s: fast path: looking for %d, set->groups[last].to %d\n",
__func__, state, set->groups[hi - 1].to);
}

/* Check the last entry so we can append in constant time. */
const fsm_state_t last = set->groups[hi - 1].to;
if (state > last) {
*dst = hi;
return FSP_FOUND_INSERT_POSITION;
} else if (state == last) {
*dst = hi - 1;
return FSP_FOUND_VALUE_PRESENT;
}
}

size_t mid;
while (lo < hi) { /* lo <= mid < hi */
mid = lo + (hi - lo)/2; /* avoid overflow */
const struct edge_group *eg = &set->groups[mid];
const fsm_state_t cur = eg->to;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: lo %zu, hi %zu, mid %zu, cur %d, looking for %d\n",
__func__, lo, hi, mid, cur, state);
}

if (state == cur) {
*dst = mid;
return FSP_FOUND_VALUE_PRESENT;
} else if (state > cur) {
lo = mid + 1;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: new lo %zd\n", __func__, lo);
}

/* Update mid if we're about to halt, because we're looking
* for the first position >= state, not the last position <=. */
if (lo == hi) {
mid = lo;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: special case, updating mid to %zd\n", __func__, mid);
}
}
} else if (state < cur) {
hi = mid;
if (LOG_BSEARCH) {
fprintf(stderr, "%s: new hi %zd\n", __func__, hi);
}
}
}

if (LOG_BSEARCH) {
fprintf(stderr, "%s: halting at %zd (looking for %d, cur %d)\n",
__func__, mid, state, set->groups[mid].to);
}

/* dst is now the first position > state (== case is handled above),
* which may be one past the end of the array. */
assert(mid == set->count || set->groups[mid].to > state);
*dst = mid;
return FSP_FOUND_INSERT_POSITION;
}

int
edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
uint64_t symbols[256/64], fsm_state_t state)
Expand Down Expand Up @@ -223,30 +318,24 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
assert(set->count <= set->ceil);

#if LOG_BITSET
fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
symbols[0], symbols[1], symbols[2], symbols[3],
state, (void *)set);
fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
symbols[0], symbols[1], symbols[2], symbols[3],
state, (void *)set);
#endif

/* Linear search for a group with the same destination
* state, or the position where that group would go. */
for (i = 0; i < set->count; i++) {
switch (find_state_position(set, state, &i)) {
case FSP_FOUND_VALUE_PRESENT:
assert(i < set->count);
eg = &set->groups[i];

if (eg->to == state) {
/* This API does not indicate whether that
* symbol -> to edge was already present. */
size_t i;
for (i = 0; i < 256/64; i++) {
eg->symbols[i] |= symbols[i];
}
dump_edge_set(set);
return 1;
} else if (eg->to > state) {
break; /* will shift down and insert below */
} else {
continue;
for (i = 0; i < 256/64; i++) {
eg->symbols[i] |= symbols[i];
}
dump_edge_set(set);
return 1;

break;
case FSP_FOUND_INSERT_POSITION:
break; /* continue below */
}

/* insert/append at i */
Expand Down
8 changes: 6 additions & 2 deletions src/adt/idmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,9 @@ idmap_iter(const struct idmap *m,
for (uint64_t b_i = 0; b_i < 64; b_i++) {
if (w & ((uint64_t)1 << b_i)) {
const unsigned v = 64*w_i + b_i;
cb(b->state, v, opaque);
if (!cb(b->state, v, opaque)) {
return;
}
}
}
}
Expand Down Expand Up @@ -371,7 +373,9 @@ idmap_iter_for_state(const struct idmap *m, fsm_state_t state_id,

if (w & ((uint64_t)1 << b_i)) {
const unsigned v = 64*w_i + b_i;
cb(b->state, v, opaque);
if (!cb(b->state, v, opaque)) {
return;
}
block_count++;
}
b_i++;
Expand Down
34 changes: 21 additions & 13 deletions src/adt/stateset.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
struct state_set {
const struct fsm_alloc *alloc;
fsm_state_t *a;
size_t i;
size_t n;
size_t i; /* used */
size_t n; /* ceil */
};

int
Expand Down Expand Up @@ -143,7 +143,8 @@ state_set_cmp(const struct state_set *a, const struct state_set *b)
}

/*
* Return where an item would be, if it were inserted
* Return where an item would be, if it were inserted.
* When insertion would append this returns one past the array.
*/
static size_t
state_set_search(const struct state_set *set, fsm_state_t state)
Expand All @@ -155,6 +156,11 @@ state_set_search(const struct state_set *set, fsm_state_t state)
assert(!IS_SINGLETON(set));
assert(set->a != NULL);

/* fast path: append case */
if (set->i > 0 && state > set->a[set->i - 1]) {
return set->i;
}

start = mid = 0;
end = set->i;

Expand All @@ -166,6 +172,12 @@ state_set_search(const struct state_set *set, fsm_state_t state)
end = mid;
} else if (r > 0) {
start = mid + 1;
/* update mid if we're about to halt, because
* we're looking for the first position >= state,
* not the last position <= */
if (start == end) {
mid = start;
}
} else {
return mid;
}
Expand Down Expand Up @@ -247,7 +259,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
*/
if (!state_set_empty(set)) {
i = state_set_search(set, state);
if (set->a[i] == state) {
if (i < set->i && set->a[i] == state) {
return 1;
}
}
Expand All @@ -266,11 +278,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
set->n *= 2;
}

if (state_set_cmpval(state, set->a[i]) > 0) {
i++;
}

if (i <= set->i) {
if (i < set->i) {
memmove(&set->a[i + 1], &set->a[i], (set->i - i) * (sizeof *set->a));
}

Expand All @@ -281,9 +289,9 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
set->i = 1;
}

#if EXPENSIVE_CHECKS
/* This assert can be pretty expensive in -O0 but in -O3 it has very
* little impact on the overall runtime. */
assert(state_set_contains(set, state));
#endif

return 1;
}
Expand Down Expand Up @@ -477,7 +485,7 @@ state_set_remove(struct state_set **setp, fsm_state_t state)
}

i = state_set_search(set, state);
if (set->a[i] == state) {
if (i < set->i && set->a[i] == state) {
if (i < set->i) {
memmove(&set->a[i], &set->a[i + 1], (set->i - i - 1) * (sizeof *set->a));
}
Expand Down Expand Up @@ -533,7 +541,7 @@ state_set_contains(const struct state_set *set, fsm_state_t state)
}

i = state_set_search(set, state);
if (set->a[i] == state) {
if (i < set->i && set->a[i] == state) {
return 1;
}

Expand Down
Loading

0 comments on commit 5ece637

Please sign in to comment.