diff --git a/src/adt/edgeset.c b/src/adt/edgeset.c index c718727ca..9658213c8 100644 --- a/src/adt/edgeset.c +++ b/src/adt/edgeset.c @@ -11,6 +11,7 @@ #include #define LOG_BITSET 0 +#define LOG_BSEARCH 0 #include "libfsm/internal.h" /* XXX: for allocating struct fsm_edge, and the edges array */ @@ -184,6 +185,100 @@ edge_set_advise_growth(struct edge_set **pset, const struct fsm_alloc *alloc, return 1; } +enum fsp_res { + FSP_FOUND_INSERT_POSITION, + FSP_FOUND_VALUE_PRESENT, +}; + +/* Use binary search to find the first position N where set->groups[N].to >= state, + * which includes the position immediately following the last entry. Return an enum + * which indicates whether state is already present. */ +static enum fsp_res +find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst) +{ + size_t lo = 0, hi = set->count; + if (LOG_BSEARCH) { + fprintf(stderr, "%s: looking for %d in %p (count %zu)\n", + __func__, state, (void *)set, set->count); + } + +#if EXPENSIVE_CHECKS + /* invariant: input is unique and sorted */ + for (size_t i = 1; i < set->count; i++) { + assert(set->groups[i - 1].to < set->groups[i].to); + } +#endif + + if (set->count == 0) { + if (LOG_BSEARCH) { + fprintf(stderr, "%s: empty, returning 0\n", __func__); + } + *dst = 0; + return FSP_FOUND_INSERT_POSITION; + } else { + if (LOG_BSEARCH) { + fprintf(stderr, "%s: fast path: looking for %d, set->groups[last].to %d\n", + __func__, state, set->groups[hi - 1].to); + } + + /* Check the last entry so we can append in constant time. */ + const fsm_state_t last = set->groups[hi - 1].to; + if (state > last) { + *dst = hi; + return FSP_FOUND_INSERT_POSITION; + } else if (state == last) { + *dst = hi - 1; + return FSP_FOUND_VALUE_PRESENT; + } + } + + size_t mid; + while (lo < hi) { /* lo <= mid < hi */ + mid = lo + (hi - lo)/2; /* avoid overflow */ + const struct edge_group *eg = &set->groups[mid]; + const fsm_state_t cur = eg->to; + if (LOG_BSEARCH) { + fprintf(stderr, "%s: lo %zu, hi %zu, mid %zu, cur %d, looking for %d\n", + __func__, lo, hi, mid, cur, state); + } + + if (state == cur) { + *dst = mid; + return FSP_FOUND_VALUE_PRESENT; + } else if (state > cur) { + lo = mid + 1; + if (LOG_BSEARCH) { + fprintf(stderr, "%s: new lo %zd\n", __func__, lo); + } + + /* Update mid if we're about to halt, because we're looking + * for the first position >= state, not the last position <=. */ + if (lo == hi) { + mid = lo; + if (LOG_BSEARCH) { + fprintf(stderr, "%s: special case, updating mid to %zd\n", __func__, mid); + } + } + } else if (state < cur) { + hi = mid; + if (LOG_BSEARCH) { + fprintf(stderr, "%s: new hi %zd\n", __func__, hi); + } + } + } + + if (LOG_BSEARCH) { + fprintf(stderr, "%s: halting at %zd (looking for %d, cur %d)\n", + __func__, mid, state, set->groups[mid].to); + } + + /* dst is now the first position > state (== case is handled above), + * which may be one past the end of the array. */ + assert(mid == set->count || set->groups[mid].to > state); + *dst = mid; + return FSP_FOUND_INSERT_POSITION; +} + int edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc, uint64_t symbols[256/64], fsm_state_t state) @@ -223,30 +318,24 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc, assert(set->count <= set->ceil); #if LOG_BITSET - fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n", - symbols[0], symbols[1], symbols[2], symbols[3], - state, (void *)set); + fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n", + symbols[0], symbols[1], symbols[2], symbols[3], + state, (void *)set); #endif - /* Linear search for a group with the same destination - * state, or the position where that group would go. */ - for (i = 0; i < set->count; i++) { + switch (find_state_position(set, state, &i)) { + case FSP_FOUND_VALUE_PRESENT: + assert(i < set->count); eg = &set->groups[i]; - - if (eg->to == state) { - /* This API does not indicate whether that - * symbol -> to edge was already present. */ - size_t i; - for (i = 0; i < 256/64; i++) { - eg->symbols[i] |= symbols[i]; - } - dump_edge_set(set); - return 1; - } else if (eg->to > state) { - break; /* will shift down and insert below */ - } else { - continue; + for (i = 0; i < 256/64; i++) { + eg->symbols[i] |= symbols[i]; } + dump_edge_set(set); + return 1; + + break; + case FSP_FOUND_INSERT_POSITION: + break; /* continue below */ } /* insert/append at i */ diff --git a/src/adt/stateset.c b/src/adt/stateset.c index c1cff9933..8d73a9b5f 100644 --- a/src/adt/stateset.c +++ b/src/adt/stateset.c @@ -44,8 +44,8 @@ struct state_set { const struct fsm_alloc *alloc; fsm_state_t *a; - size_t i; - size_t n; + size_t i; /* used */ + size_t n; /* ceil */ }; int @@ -138,7 +138,8 @@ state_set_cmp(const struct state_set *a, const struct state_set *b) } /* - * Return where an item would be, if it were inserted + * Return where an item would be, if it were inserted. + * When insertion would append this returns one past the array. */ static size_t state_set_search(const struct state_set *set, fsm_state_t state) @@ -150,6 +151,11 @@ state_set_search(const struct state_set *set, fsm_state_t state) assert(!IS_SINGLETON(set)); assert(set->a != NULL); + /* fast path: append case */ + if (set->i > 0 && state > set->a[set->i - 1]) { + return set->i; + } + start = mid = 0; end = set->i; @@ -161,6 +167,12 @@ state_set_search(const struct state_set *set, fsm_state_t state) end = mid; } else if (r > 0) { start = mid + 1; + /* update mid if we're about to halt, because + * we're looking for the first position >= state, + * not the last position <= */ + if (start == end) { + mid = start; + } } else { return mid; } @@ -242,7 +254,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc, */ if (!state_set_empty(set)) { i = state_set_search(set, state); - if (set->a[i] == state) { + if (i < set->i && set->a[i] == state) { return 1; } } @@ -261,11 +273,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc, set->n *= 2; } - if (state_set_cmpval(state, set->a[i]) > 0) { - i++; - } - - if (i <= set->i) { + if (i < set->i) { memmove(&set->a[i + 1], &set->a[i], (set->i - i) * (sizeof *set->a)); } @@ -276,6 +284,8 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc, set->i = 1; } + /* This assert can be pretty expensive in -O0 but in -O3 it has very + * little impact on the overall runtime. */ assert(state_set_contains(set, state)); return 1; @@ -470,7 +480,7 @@ state_set_remove(struct state_set **setp, fsm_state_t state) } i = state_set_search(set, state); - if (set->a[i] == state) { + if (i < set->i && set->a[i] == state) { if (i < set->i) { memmove(&set->a[i], &set->a[i + 1], (set->i - i - 1) * (sizeof *set->a)); } @@ -524,7 +534,7 @@ state_set_contains(const struct state_set *set, fsm_state_t state) } i = state_set_search(set, state); - if (set->a[i] == state) { + if (i < set->i && set->a[i] == state) { return 1; } diff --git a/src/libfsm/determinise.c b/src/libfsm/determinise.c index 56e135afd..3dbdc429e 100644 --- a/src/libfsm/determinise.c +++ b/src/libfsm/determinise.c @@ -2016,28 +2016,87 @@ static void sort_and_dedup_dst_buf(fsm_state_t *buf, size_t *used) { const size_t orig_used = *used; - qsort(buf, orig_used, sizeof(buf[0]), cmp_fsm_state_t); - - /* squash out duplicates */ - size_t rd = 1; - size_t wr = 1; - while (rd < orig_used) { - if (buf[rd - 1] == buf[rd]) { - rd++; /* skip */ - } else { - buf[wr] = buf[rd]; - rd++; - wr++; - } + + if (orig_used <= 1) { + return; /* no change */ } - *used = wr; -#if EXPENSIVE_CHECKS - assert(wr <= orig_used); - for (size_t i = 1; i < *used; i++) { - assert(buf[i - 1] < buf[i]); + /* Figure out what the min and max values are, because + * when the difference between them is not too large it + * can be significantly faster to avoid qsort here. */ + fsm_state_t min = (fsm_state_t)-1; + fsm_state_t max = 0; + for (size_t i = 0; i < orig_used; i++) { + const fsm_state_t cur = buf[i]; + if (cur < min) { min = cur; } + if (cur > max) { max = cur; } } + + /* If there's only one unique value, then we're done. */ + if (min == max) { + buf[0] = min; + *used = 1; + return; + } + +/* 81920 = 10 KB buffer on the stack. This must be divisible by 64. + * Set to 0 to disable. */ +#define QSORT_CUTOFF 81920 + + if (QSORT_CUTOFF == 0 || max - min > QSORT_CUTOFF) { + /* If the bitset would be very large but sparse due to + * extreme values, then fall back on using qsort and + * then sweeping over the array to squash out + * duplicates. */ + qsort(buf, orig_used, sizeof(buf[0]), cmp_fsm_state_t); + + /* squash out duplicates */ + size_t rd = 1; + size_t wr = 1; + while (rd < orig_used) { + if (buf[rd - 1] == buf[rd]) { + rd++; /* skip */ + } else { + buf[wr] = buf[rd]; + rd++; + wr++; + } + } + + *used = wr; +#if EXPENSIVE_CHECKS + assert(wr <= orig_used); + for (size_t i = 1; i < *used; i++) { + assert(buf[i - 1] < buf[i]); + } #endif + } else { + /* Convert the array into a bitset and back, which sorts + * and deduplicates in the process. Add 1 to avoid a zero- + * zero-length array error if QSORT_CUTOFF is 0. */ + uint64_t bitset[QSORT_CUTOFF/64 + 1]; + const size_t words = u64bitset_words(max - min + 1); + memset(bitset, 0x00, words * sizeof(bitset[0])); + + for (size_t i = 0; i < orig_used; i++) { + u64bitset_set(bitset, buf[i] - min); + } + + size_t dst = 0; + for (size_t i = 0; i < words; i++) { + const uint64_t w = bitset[i]; + if (w != 0) { /* skip empty words */ + uint64_t bit = 0x1; + for (size_t b_i = 0; b_i < 64; b_i++, bit <<= 1) { + if (w & bit) { + buf[dst] = 64*i + b_i + min; + dst++; + } + } + } + } + *used = dst; + } } static int diff --git a/src/libre/ast_compile.c b/src/libre/ast_compile.c index 502faf8b4..aa0902d55 100644 --- a/src/libre/ast_compile.c +++ b/src/libre/ast_compile.c @@ -208,11 +208,11 @@ addedge_literal(struct comp_env *env, enum re_flags re_flags, assert(to < env->fsm->statecount); if (re_flags & RE_ICASE) { - if (!fsm_addedge_literal(fsm, from, to, tolower((unsigned char) c))) { + if (!fsm_addedge_literal(fsm, from, to, (char)tolower((unsigned char) c))) { return 0; } - if (!fsm_addedge_literal(fsm, from, to, toupper((unsigned char) c))) { + if (!fsm_addedge_literal(fsm, from, to, (char)toupper((unsigned char) c))) { return 0; } } else { diff --git a/src/retest/main.c b/src/retest/main.c index b6b4c52f7..e01c93e7c 100644 --- a/src/retest/main.c +++ b/src/retest/main.c @@ -393,7 +393,7 @@ parse_escapes(char *s, char **errpos, int *lenp) ndig++; } else { - s[j++] = ccode; + s[j++] = (char)ccode; st = ST_BARE; if (!hexcurly) {