diff --git a/.gitignore b/.gitignore index 567609b12..65d6036f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -build/ +/build +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..adadce09b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "libfsm-rs" +version = "0.1.0" +authors = ["Kate F "] +edition = "2018" + +[lib] +crate-type = ["staticlib"] +path = "src/libfsm/lib.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +libc = "0.2" diff --git a/Makefile b/Makefile index d5c14b3cc..9982082e6 100644 --- a/Makefile +++ b/Makefile @@ -92,7 +92,6 @@ INCDIR += include .include .include .include -.include .include .include .include @@ -110,3 +109,24 @@ STAGE_BUILD := ${STAGE_BUILD:Nbin/cvtpcre} grep FAIL ${BUILD}/tests/*/res*; [ $$? -ne 0 ] .endif +.if !${CC:T:Memcc*} + +./target/debug/liblibfsm_rs.a: + cargo build + +test:: + cargo test + +./target/debug/liblibfsm_rs.d: ./target/debug/liblibfsm_rs.a +.if exists(./target/debug/liblibfsm_rs.d) +.include "./target/debug/liblibfsm_rs.d" +.endif + +.for part in ${PART} +${BUILD}/lib/${part}.o: ./target/debug/liblibfsm_rs.a +# hijacking LDRFLAGS here because the target only expects .o sources +LDRFLAGS.${part} += ./target/debug/liblibfsm_rs.a +.endfor + +.endif + diff --git a/src/fsm/Makefile b/src/fsm/Makefile index 5b5f212fa..3eb6c7af4 100644 --- a/src/fsm/Makefile +++ b/src/fsm/Makefile @@ -61,6 +61,8 @@ LDD_VER!= \ LFLAGS.fsm += -lrt .endif +LFLAGS.fsm += -lpthread -ldl + .endif .endif diff --git a/src/libfsm/Makefile b/src/libfsm/Makefile index c84f504bd..fefec2ed7 100644 --- a/src/libfsm/Makefile +++ b/src/libfsm/Makefile @@ -8,13 +8,11 @@ SRC += src/libfsm/clone.c SRC += src/libfsm/closure.c SRC += src/libfsm/edge.c SRC += src/libfsm/empty.c -SRC += src/libfsm/end.c SRC += src/libfsm/endids.c SRC += src/libfsm/equal.c SRC += src/libfsm/exec.c SRC += src/libfsm/fsm.c SRC += src/libfsm/mode.c -SRC += src/libfsm/start.c SRC += src/libfsm/state.c SRC += src/libfsm/trim.c SRC += src/libfsm/example.c diff --git a/src/libfsm/capture.c b/src/libfsm/capture.c index c6ae38c08..726202a46 100644 --- a/src/libfsm/capture.c +++ b/src/libfsm/capture.c @@ -21,7 +21,7 @@ fsm_capture_init(struct fsm *fsm) } fsm->capture_info = ci; - for (i = 0; i < fsm->statealloc; i++) { + for (i = 0; i < fsm->statecount; i++) { fsm->states[i].has_capture_actions = 0; } diff --git a/src/libfsm/end.c b/src/libfsm/end.c deleted file mode 100644 index 5a4aa0d72..000000000 --- a/src/libfsm/end.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2008-2017 Katherine Flavel - * - * See LICENCE for the full copyright terms. - */ - -#include -#include - -#include -#include - -#include - -#include "internal.h" - -void -fsm_setend(struct fsm *fsm, fsm_state_t state, int end) -{ - (void) fsm; - - assert(fsm != NULL); - assert(state < fsm->statecount); - - if (fsm->states[state].end == !!end) { - return; - } - - switch (end) { - case 0: - assert(fsm->endcount > 0); - fsm->endcount--; - fsm->states[state].end = 0; - break; - - case 1: - assert(fsm->endcount < FSM_ENDCOUNT_MAX); - fsm->endcount++; - fsm->states[state].end = 1; - break; - } -} diff --git a/src/libfsm/fsm.c b/src/libfsm/fsm.c index 60f748184..99848c603 100644 --- a/src/libfsm/fsm.c +++ b/src/libfsm/fsm.c @@ -22,6 +22,7 @@ #include "internal.h" #include "capture.h" #include "endids.h" +#include "libfsm_rs.h" void free_contents(struct fsm *fsm) @@ -47,6 +48,9 @@ fsm_new(const struct fsm_options *opt) static const struct fsm_options defaults; struct fsm *new, f; + /* just to prove linking works */ + fsm_noop(); + if (opt == NULL) { opt = &defaults; } diff --git a/src/libfsm/fsm.rs b/src/libfsm/fsm.rs new file mode 100644 index 000000000..294d47e4a --- /dev/null +++ b/src/libfsm/fsm.rs @@ -0,0 +1,243 @@ +//! Representation of a Finite State Machine. + +use libc::c_void; +use std::slice; + +// Keep in sync with include/fsm.h:fsm_state_t +type StateId = u32; + +// Opaque pointer to struct edget_set +type EdgeSet = *mut c_void; + +// Opaque pointer to struct state_set +type StateSet = *mut c_void; + +// Opaque pointer to struct fsm_capture_info +type CaptureInfo = *mut c_void; + +// Opaque pointer to struct endid_info +type EndIdInfo = *mut c_void; + +// Opaque pointer to struct fsm_options +type Options = *const c_void; + +const ENDCOUNT_MAX: usize = usize::MAX; + +/// One state in a `Fsm`'s array of states. +// Keep in sync with interanl.h:struct fsm_state +#[repr(C)] +struct State { + end: bool, + + /// If false, then this state has no need for checking the fsm->capture_info struct. + has_capture_actions: bool, + + /// Meaningful within one particular transformation only. + visited: bool, + + edges: EdgeSet, + epsilons: StateSet, +} + +/// Finite State Machine. +// Keep in sync with interanl.h:struct fsm +#[repr(C)] +pub struct Fsm { + /// Array. + states: *mut State, + + /// Number of elements allocated. + statealloc: usize, + + /// Number of elements populated. + statecount: usize, + + endcount: usize, + + start: StateId, + hasstart: bool, + + capture_info: CaptureInfo, + endid_info: EndIdInfo, + opt: Options, +} + +impl Fsm { + pub fn clear_start(&mut self) { + self.hasstart = false; + } + + pub fn set_start(&mut self, state: StateId) { + assert!((state as usize) < self.statecount); + + self.start = state; + self.hasstart = true; + } + + pub fn get_start(&self) -> Option { + if self.hasstart { + assert!((self.start as usize) < self.statecount); + Some(self.start) + } else { + None + } + } + + fn states_as_mut_slice(&self) -> &mut [State] { + unsafe { slice::from_raw_parts_mut(self.states, self.statecount) } + } + + pub fn set_end(&mut self, state: StateId, end: bool) { + assert!((state as usize) < self.statecount); + + { + // Temporary scope so the `s` mutable borrow terminates before + // we frob `self.endcount` below. + + let states = self.states_as_mut_slice(); + let s = &mut states[state as usize]; + + if s.end == end { + return; + } else { + s.end = end; + } + } + + if end { + assert!(self.endcount < ENDCOUNT_MAX); + self.endcount += 1; + } else { + assert!(self.endcount > 0); + self.endcount -= 1; + } + } +} + +#[no_mangle] +pub unsafe fn fsm_clearstart(fsm: *mut Fsm) { + assert!(!fsm.is_null()); + let fsm = &mut *fsm; + + fsm.clear_start(); +} + +#[no_mangle] +pub unsafe fn fsm_setstart(fsm: *mut Fsm, state: StateId) { + assert!(!fsm.is_null()); + let fsm = &mut *fsm; + + fsm.set_start(state); +} + +#[no_mangle] +pub unsafe fn fsm_getstart(fsm: *const Fsm, out_start: *mut StateId) -> i32 { + assert!(!fsm.is_null()); + assert!(!out_start.is_null()); + + let fsm = &*fsm; + match fsm.get_start() { + Some(id) => { + *out_start = id; + 1 + } + + None => 0, + } +} + +#[no_mangle] +pub unsafe fn fsm_setend(fsm: *mut Fsm, state: StateId, end: i32) { + assert!(!fsm.is_null()); + let fsm = &mut *fsm; + + let end = if end != 0 { true } else { false }; + + fsm.set_end(state, end) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ptr; + + // Just here until we can construct Fsm in Rust + fn dummy_fsm_new() -> Fsm { + Fsm { + states: ptr::null_mut(), + statealloc: 0, + statecount: 0, + endcount: 0, + start: 0, + hasstart: false, + capture_info: ptr::null_mut(), + endid_info: ptr::null_mut(), + opt: ptr::null(), + } + } + + #[test] + fn sets_and_clears_start() { + let mut fsm = dummy_fsm_new(); + + // FIXME: this sets up an inconsistent state, but we need it to + // test the assertion inside fsm.set_start(). + fsm.statecount = 1; + + fsm.set_start(0); + assert_eq!(fsm.get_start(), Some(0)); + + fsm.clear_start(); + assert!(fsm.get_start().is_none()) + } + + #[test] + fn sets_and_clears_start_from_c() { + let mut fsm = dummy_fsm_new(); + let fsm_ptr = &mut fsm as *mut _; + + // FIXME: this sets up an inconsistent state, but we need it to + // test the assertion inside fsm_setstart(). + fsm.statecount = 1; + + unsafe { + let mut n = 0; + + fsm_setstart(fsm_ptr, 0); + assert_eq!(fsm_getstart(fsm_ptr, &mut n), 1); + assert_eq!(n, 0); + + fsm_clearstart(fsm_ptr); + assert_eq!(fsm_getstart(fsm_ptr, &mut n), 0); + } + } + + #[test] + fn set_end_works() { + let mut fsm = dummy_fsm_new(); + + fn make_state() -> State { + State { + end: false, + has_capture_actions: false, + visited: false, + edges: ptr::null_mut(), + epsilons: ptr::null_mut(), + } + } + + let mut states = vec![make_state(), make_state(), make_state()]; + + fsm.states = states.as_mut_ptr(); + fsm.statecount = 3; + fsm.statealloc = 3; + + fsm.set_end(1, true); // yay aliased mutability + assert_eq!(fsm.endcount, 1); + assert!(states[1].end); + + fsm.set_end(1, false); + assert_eq!(fsm.endcount, 0); + assert!(!states[1].end); + } +} diff --git a/src/libfsm/internal.h b/src/libfsm/internal.h index a94f11141..69a1560ff 100644 --- a/src/libfsm/internal.h +++ b/src/libfsm/internal.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -52,20 +53,22 @@ struct fsm_edge { unsigned char symbol; }; +/* Keep in sync with fsm.rs:State */ struct fsm_state { - unsigned int end:1; + bool end; /* If 0, then this state has no need for checking * the fsm->capture_info struct. */ - unsigned int has_capture_actions:1; + bool has_capture_actions; /* meaningful within one particular transformation only */ - unsigned int visited:1; + bool visited; struct edge_set *edges; struct state_set *epsilons; }; +/* Keep in sync with fsm.rs:Fsm */ struct fsm { struct fsm_state *states; /* array */ @@ -74,7 +77,7 @@ struct fsm { size_t endcount; fsm_state_t start; - unsigned int hasstart:1; + bool hasstart; struct fsm_capture_info *capture_info; struct endid_info *endid_info; diff --git a/src/libfsm/lib.rs b/src/libfsm/lib.rs new file mode 100644 index 000000000..e398b586b --- /dev/null +++ b/src/libfsm/lib.rs @@ -0,0 +1,5 @@ +mod fsm; + +// just here to prove linking works +#[no_mangle] +pub extern "C" fn fsm_noop() {} diff --git a/src/libfsm/libfsm.syms b/src/libfsm/libfsm.syms index 947012fd0..667aac301 100644 --- a/src/libfsm/libfsm.syms +++ b/src/libfsm/libfsm.syms @@ -126,3 +126,7 @@ fsm_capture_set_path fsm_capture_rebase_capture_id fsm_capture_alloc fsm_capture_dump + +# from rust +fsm_noop + diff --git a/src/libfsm/libfsm_rs.h b/src/libfsm/libfsm_rs.h new file mode 100644 index 000000000..d81f33857 --- /dev/null +++ b/src/libfsm/libfsm_rs.h @@ -0,0 +1,6 @@ +#ifndef LIBFSM_RS_H +#define LIBFSM_RS_H + +void fsm_noop(void); + +#endif diff --git a/src/libfsm/start.c b/src/libfsm/start.c deleted file mode 100644 index 98ef1a4db..000000000 --- a/src/libfsm/start.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2008-2017 Katherine Flavel - * - * See LICENCE for the full copyright terms. - */ - -#include -#include - -#include - -#include - -#include "internal.h" - -void -fsm_clearstart(struct fsm *fsm) -{ - assert(fsm != NULL); - - fsm->hasstart = 0; -} - -void -fsm_setstart(struct fsm *fsm, fsm_state_t state) -{ - assert(fsm != NULL); - assert(state < fsm->statecount); - - fsm->start = state; - fsm->hasstart = 1; -} - -int -fsm_getstart(const struct fsm *fsm, fsm_state_t *start) -{ - assert(fsm != NULL); - assert(start != NULL); - - if (!fsm->hasstart) { - return 0; - } - - assert(fsm->start < fsm->statecount); - - *start = fsm->start; - return 1; -} - diff --git a/src/libfsm/state.c b/src/libfsm/state.c index acf2bff25..24ab10619 100644 --- a/src/libfsm/state.c +++ b/src/libfsm/state.c @@ -18,16 +18,21 @@ #include "internal.h" +static void +init_state(struct fsm_state *state) +{ + state->end = 0; + state->has_capture_actions = false; + state->visited = 0; + state->epsilons = NULL; + state->edges = NULL; +} + int fsm_addstate(struct fsm *fsm, fsm_state_t *state) { assert(fsm != NULL); - if (fsm->statecount == (fsm_state_t) -1) { - errno = ENOMEM; - return 0; - } - /* TODO: something better than one contigious realloc */ if (fsm->statecount == fsm->statealloc) { const size_t factor = 2; /* a guess */ @@ -40,10 +45,6 @@ fsm_addstate(struct fsm *fsm, fsm_state_t *state) return 0; } - for (i = fsm->statealloc; i < n; i++) { - tmp[i].has_capture_actions = 0; - } - fsm->statealloc = n; fsm->states = tmp; } @@ -52,16 +53,7 @@ fsm_addstate(struct fsm *fsm, fsm_state_t *state) *state = fsm->statecount; } - { - struct fsm_state *new; - - new = &fsm->states[fsm->statecount]; - - new->end = 0; - new->visited = 0; - new->epsilons = NULL; - new->edges = NULL; - } + init_state(&fsm->states[fsm->statecount]); fsm->statecount++; @@ -77,14 +69,7 @@ fsm_addstate_bulk(struct fsm *fsm, size_t n) if (fsm->statecount + n <= fsm->statealloc) { for (i = 0; i < n; i++) { - struct fsm_state *new; - - new = &fsm->states[fsm->statecount + i]; - - new->end = 0; - new->visited = 0; - new->epsilons = NULL; - new->edges = NULL; + init_state(&fsm->states[fsm->statecount + i]); } fsm->statecount += n; diff --git a/src/lx/Makefile b/src/lx/Makefile index 683341ab0..2a3f631f9 100644 --- a/src/lx/Makefile +++ b/src/lx/Makefile @@ -24,7 +24,7 @@ CFLAGS.${src} += -pthread DFLAGS.${src} += -pthread .endfor .endif -LFLAGS.lx += -lpthread +LFLAGS.lx += -lpthread -ldl LEXER += src/lx/lexer.lx PARSER += src/lx/parser.sid diff --git a/src/re/Makefile b/src/re/Makefile index 6563b0b8c..385f04622 100644 --- a/src/re/Makefile +++ b/src/re/Makefile @@ -17,9 +17,12 @@ ${BUILD}/bin/re: ${BUILD}/lib/${lib:R}.a ${BUILD}/bin/re: ${BUILD}/${src:R}.o .endfor +.if ${SYSTEM} == Linux +LFLAGS.re += -lpthread -ldl +.endif + # smoke test for sh codegen -- does not work for bash 3.2.x .if ${SYSTEM} != Darwin test:: ${BUILD}/bin/re echo -n abcd | bash -c "`./build/bin/re -pl sh -k str -br glob 'a?c*d'`" .endif - diff --git a/src/retest/Makefile b/src/retest/Makefile index b56d5b86f..07ef50443 100644 --- a/src/retest/Makefile +++ b/src/retest/Makefile @@ -16,8 +16,9 @@ DFLAGS.${src} += -std=c99 .endfor .if ${SYSTEM} == Linux -LFLAGS.reperf += -ldl -LFLAGS.retest += -ldl +LFLAGS.reperf += -lpthread -ldl +LFLAGS.retest += -lpthread -ldl +LFLAGS.cvtpcre += -lpthread -ldl .endif PROG += retest diff --git a/tests/aho_corasick/Makefile b/tests/aho_corasick/Makefile index 5748ddd5c..ff6ebd1f6 100644 --- a/tests/aho_corasick/Makefile +++ b/tests/aho_corasick/Makefile @@ -19,7 +19,7 @@ ${TEST_OUTDIR.tests/aho_corasick}/actest: ${BUILD}/lib/${lib:R}.a AC_TEST=${TEST_OUTDIR.tests/aho_corasick}/actest ${AC_TEST}: - ${CC} ${CFLAGS} -o ${.TARGET} ${.ALLSRC} + ${CC} ${CFLAGS} -o ${.TARGET} ${.ALLSRC} -lpthread -ldl test:: ${AC_TEST} diff --git a/tests/capture/Makefile b/tests/capture/Makefile index 53d63ff2b..e9a80a9b3 100644 --- a/tests/capture/Makefile +++ b/tests/capture/Makefile @@ -10,7 +10,7 @@ SRC += ${TEST_SRCDIR.tests/capture}/capture${n}.c CFLAGS.${TEST_SRCDIR.tests/capture}/capture${n}.c = -UNDEBUG ${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o - ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a + ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a -lpthread -ldl ${TEST_OUTDIR.tests/capture}/res${n}: ${TEST_OUTDIR.tests/capture}/run${n} ( ${TEST_OUTDIR.tests/capture}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/capture}/res${n} diff --git a/tests/queue/Makefile b/tests/queue/Makefile index a675667ee..22cb31def 100644 --- a/tests/queue/Makefile +++ b/tests/queue/Makefile @@ -9,7 +9,7 @@ test:: ${TEST_OUTDIR.tests/queue}/res${n} SRC += ${TEST_SRCDIR.tests/queue}/queue${n}.c CFLAGS.${TEST_SRCDIR.tests/queue}/queue${n}.c = -UNDEBUG ${TEST_OUTDIR.tests/queue}/run${n}: ${TEST_OUTDIR.tests/queue}/queue${n}.o ${BUILD}/lib/adt.o - ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/queue}/run${n} ${TEST_OUTDIR.tests/queue}/queue${n}.o ${BUILD}/lib/adt.o + ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/queue}/run${n} ${TEST_OUTDIR.tests/queue}/queue${n}.o ${BUILD}/lib/adt.o -lpthread -ldl ${TEST_OUTDIR.tests/queue}/res${n}: ${TEST_OUTDIR.tests/queue}/run${n} ( ${TEST_OUTDIR.tests/queue}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/queue}/res${n} .endfor