From 8d24c4fdb9d453025c5acb84409ae5b74c56bd65 Mon Sep 17 00:00:00 2001 From: Kate F Date: Mon, 19 Apr 2021 17:17:04 -0700 Subject: [PATCH 01/21] Initial import for Cargo. --- .gitignore | 4 +++- Cargo.toml | 13 +++++++++++++ Makefile | 15 +++++++++++++++ src/libfsm/fsm.c | 4 ++++ src/libfsm/lib.rs | 12 ++++++++++++ src/libfsm/libfsm.syms | 4 ++++ src/libfsm/libfsm_rs.h | 6 ++++++ 7 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 Cargo.toml create mode 100644 src/libfsm/lib.rs create mode 100644 src/libfsm/libfsm_rs.h diff --git a/.gitignore b/.gitignore index 567609b12..65d6036f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -build/ +/build +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..f443424c4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "libfsm-rs" +version = "0.1.0" +authors = ["Kate F "] +edition = "2018" + +[lib] +path = "src/libfsm/lib.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + diff --git a/Makefile b/Makefile index d5c14b3cc..053fb0363 100644 --- a/Makefile +++ b/Makefile @@ -110,3 +110,18 @@ STAGE_BUILD := ${STAGE_BUILD:Nbin/cvtpcre} grep FAIL ${BUILD}/tests/*/res*; [ $$? -ne 0 ] .endif +.if !${CC:T:Memcc*} + +.for prog in ${PROG} +${BUILD}/bin/${prog}: ./target/debug/liblibfsm_rs.a +.endfor + +# naming for kmkf prog.mk +./target/debug/liblibfsm_rs.a: ./target/debug/liblibfsm_rs.rlib + ln -sf ${.ALLSRC:T} ${.TARGET} + +./target/debug/liblibfsm_rs.rlib: + cargo build + +.endif + diff --git a/src/libfsm/fsm.c b/src/libfsm/fsm.c index 60f748184..99848c603 100644 --- a/src/libfsm/fsm.c +++ b/src/libfsm/fsm.c @@ -22,6 +22,7 @@ #include "internal.h" #include "capture.h" #include "endids.h" +#include "libfsm_rs.h" void free_contents(struct fsm *fsm) @@ -47,6 +48,9 @@ fsm_new(const struct fsm_options *opt) static const struct fsm_options defaults; struct fsm *new, f; + /* just to prove linking works */ + fsm_noop(); + if (opt == NULL) { opt = &defaults; } diff --git a/src/libfsm/lib.rs b/src/libfsm/lib.rs new file mode 100644 index 000000000..def4ba84e --- /dev/null +++ b/src/libfsm/lib.rs @@ -0,0 +1,12 @@ +// just here to prove linking works +#[no_mangle] +pub extern "C" fn fsm_noop() { +} + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} diff --git a/src/libfsm/libfsm.syms b/src/libfsm/libfsm.syms index 947012fd0..667aac301 100644 --- a/src/libfsm/libfsm.syms +++ b/src/libfsm/libfsm.syms @@ -126,3 +126,7 @@ fsm_capture_set_path fsm_capture_rebase_capture_id fsm_capture_alloc fsm_capture_dump + +# from rust +fsm_noop + diff --git a/src/libfsm/libfsm_rs.h b/src/libfsm/libfsm_rs.h new file mode 100644 index 000000000..d81f33857 --- /dev/null +++ b/src/libfsm/libfsm_rs.h @@ -0,0 +1,6 @@ +#ifndef LIBFSM_RS_H +#define LIBFSM_RS_H + +void fsm_noop(void); + +#endif From a643036c6bb26bfbc82f024934589c4fd1c6868f Mon Sep 17 00:00:00 2001 From: Kate F Date: Mon, 19 Apr 2021 17:33:39 -0700 Subject: [PATCH 02/21] Special cases for tests with their own build stuff. --- tests/aho_corasick/Makefile | 4 ++++ tests/capture/Makefile | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/tests/aho_corasick/Makefile b/tests/aho_corasick/Makefile index 5748ddd5c..89524ed3b 100644 --- a/tests/aho_corasick/Makefile +++ b/tests/aho_corasick/Makefile @@ -16,6 +16,10 @@ ${TEST_OUTDIR.tests/aho_corasick}/actest: ${TEST_OUTDIR.tests/aho_corasick}/acte ${TEST_OUTDIR.tests/aho_corasick}/actest: ${BUILD}/lib/${lib:R}.a .endfor +.if !${CC:T:Memcc*} +${TEST_OUTDIR.tests/aho_corasick}/actest: ./target/debug/liblibfsm_rs.a +.endif + AC_TEST=${TEST_OUTDIR.tests/aho_corasick}/actest ${AC_TEST}: diff --git a/tests/capture/Makefile b/tests/capture/Makefile index 53d63ff2b..9117ebb2c 100644 --- a/tests/capture/Makefile +++ b/tests/capture/Makefile @@ -9,8 +9,13 @@ test:: ${TEST_OUTDIR.tests/capture}/res${n} SRC += ${TEST_SRCDIR.tests/capture}/capture${n}.c CFLAGS.${TEST_SRCDIR.tests/capture}/capture${n}.c = -UNDEBUG +.if ${CC:T:Memcc*} ${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a +.else +${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ./target/debug/liblibfsm_rs.rlib + ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a ./target/debug/liblibfsm_rs.a +.endif ${TEST_OUTDIR.tests/capture}/res${n}: ${TEST_OUTDIR.tests/capture}/run${n} ( ${TEST_OUTDIR.tests/capture}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/capture}/res${n} From 00c33dfb2a88ee061fc7dc8ae65f4e11d847dd10 Mon Sep 17 00:00:00 2001 From: Kate F Date: Mon, 19 Apr 2021 17:53:56 -0700 Subject: [PATCH 03/21] An attempt to bring in Cargo-generated dependencies. --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 053fb0363..4025c0c4f 100644 --- a/Makefile +++ b/Makefile @@ -123,5 +123,10 @@ ${BUILD}/bin/${prog}: ./target/debug/liblibfsm_rs.a ./target/debug/liblibfsm_rs.rlib: cargo build +./target/debug/liblibfsm_rs.d: ./target/debug/liblibfsm_rs.rlib +.if exists(./target/debug/liblibfsm_rs.d) +.include "./target/debug/liblibfsm_rs.d" +.endif + .endif From 977208935f45e0fed00e4bd0aedf340b547f3b62 Mon Sep 17 00:00:00 2001 From: Kate F Date: Mon, 19 Apr 2021 17:54:17 -0700 Subject: [PATCH 04/21] Run cargo test. --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 4025c0c4f..2f5cfcb72 100644 --- a/Makefile +++ b/Makefile @@ -123,6 +123,9 @@ ${BUILD}/bin/${prog}: ./target/debug/liblibfsm_rs.a ./target/debug/liblibfsm_rs.rlib: cargo build +test:: + cargo test + ./target/debug/liblibfsm_rs.d: ./target/debug/liblibfsm_rs.rlib .if exists(./target/debug/liblibfsm_rs.d) .include "./target/debug/liblibfsm_rs.d" From 6e05435ba6fd21d3cd53919731e9a49c425bcf3b Mon Sep 17 00:00:00 2001 From: Kate F Date: Mon, 19 Apr 2021 17:58:59 -0700 Subject: [PATCH 05/21] Switch to `staticlib` rather than symlinking for an archive. --- Cargo.toml | 1 + Makefile | 8 ++------ tests/capture/Makefile | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f443424c4..48f6a50a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Kate F "] edition = "2018" [lib] +crate-type = ["staticlib"] path = "src/libfsm/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/Makefile b/Makefile index 2f5cfcb72..b1bbc3e5b 100644 --- a/Makefile +++ b/Makefile @@ -116,17 +116,13 @@ STAGE_BUILD := ${STAGE_BUILD:Nbin/cvtpcre} ${BUILD}/bin/${prog}: ./target/debug/liblibfsm_rs.a .endfor -# naming for kmkf prog.mk -./target/debug/liblibfsm_rs.a: ./target/debug/liblibfsm_rs.rlib - ln -sf ${.ALLSRC:T} ${.TARGET} - -./target/debug/liblibfsm_rs.rlib: +./target/debug/liblibfsm_rs.a: cargo build test:: cargo test -./target/debug/liblibfsm_rs.d: ./target/debug/liblibfsm_rs.rlib +./target/debug/liblibfsm_rs.d: ./target/debug/liblibfsm_rs.a .if exists(./target/debug/liblibfsm_rs.d) .include "./target/debug/liblibfsm_rs.d" .endif diff --git a/tests/capture/Makefile b/tests/capture/Makefile index 9117ebb2c..bf8734df7 100644 --- a/tests/capture/Makefile +++ b/tests/capture/Makefile @@ -13,7 +13,7 @@ CFLAGS.${TEST_SRCDIR.tests/capture}/capture${n}.c = -UNDEBUG ${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a .else -${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ./target/debug/liblibfsm_rs.rlib +${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ./target/debug/liblibfsm_rs.a ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a ./target/debug/liblibfsm_rs.a .endif From 34e8b493f4f01999759e06457c033ece8bd7b06d Mon Sep 17 00:00:00 2001 From: Kate F Date: Mon, 19 Apr 2021 21:07:51 -0700 Subject: [PATCH 06/21] Remove support for building shared libraries. This is a shame, but gets in the way of porting to Rust. --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index b1bbc3e5b..251e95288 100644 --- a/Makefile +++ b/Makefile @@ -92,7 +92,6 @@ INCDIR += include .include .include .include -.include .include .include .include From b572d09632199cf9b0935c43a4de9214396ec7c4 Mon Sep 17 00:00:00 2001 From: Kate F Date: Mon, 19 Apr 2021 21:44:22 -0700 Subject: [PATCH 07/21] Add `-undefined dynamic_lookup` for symbols provided by Cargo-built libraries. --- Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Makefile b/Makefile index 251e95288..b8920ad5b 100644 --- a/Makefile +++ b/Makefile @@ -126,5 +126,12 @@ test:: .include "./target/debug/liblibfsm_rs.d" .endif +# for symbols provided by libraries built by cargo instead +.if ${SYSTEM} == Darwin +.for part in ${PART} +LDRFLAGS.${part} += -undefined dynamic_lookup +.endfor +.endif + .endif From bb83fd4a68fe1bf889068bc553aa3e422a1afc71 Mon Sep 17 00:00:00 2001 From: Kate F Date: Tue, 20 Apr 2021 02:05:34 -0700 Subject: [PATCH 08/21] Switch to ld -r linking the cargo-generated .a archive when producing the partially linked .o files. This hopefully avoids an ld bug where `-undefined dynamic_lookup` doesn't work on MacOS, and also removes the need for programs to link both the C and Rust libraries directly. --- Makefile | 11 +++-------- tests/aho_corasick/Makefile | 4 ---- tests/capture/Makefile | 5 ----- 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index b8920ad5b..9982082e6 100644 --- a/Makefile +++ b/Makefile @@ -111,10 +111,6 @@ STAGE_BUILD := ${STAGE_BUILD:Nbin/cvtpcre} .if !${CC:T:Memcc*} -.for prog in ${PROG} -${BUILD}/bin/${prog}: ./target/debug/liblibfsm_rs.a -.endfor - ./target/debug/liblibfsm_rs.a: cargo build @@ -126,12 +122,11 @@ test:: .include "./target/debug/liblibfsm_rs.d" .endif -# for symbols provided by libraries built by cargo instead -.if ${SYSTEM} == Darwin .for part in ${PART} -LDRFLAGS.${part} += -undefined dynamic_lookup +${BUILD}/lib/${part}.o: ./target/debug/liblibfsm_rs.a +# hijacking LDRFLAGS here because the target only expects .o sources +LDRFLAGS.${part} += ./target/debug/liblibfsm_rs.a .endfor -.endif .endif diff --git a/tests/aho_corasick/Makefile b/tests/aho_corasick/Makefile index 89524ed3b..5748ddd5c 100644 --- a/tests/aho_corasick/Makefile +++ b/tests/aho_corasick/Makefile @@ -16,10 +16,6 @@ ${TEST_OUTDIR.tests/aho_corasick}/actest: ${TEST_OUTDIR.tests/aho_corasick}/acte ${TEST_OUTDIR.tests/aho_corasick}/actest: ${BUILD}/lib/${lib:R}.a .endfor -.if !${CC:T:Memcc*} -${TEST_OUTDIR.tests/aho_corasick}/actest: ./target/debug/liblibfsm_rs.a -.endif - AC_TEST=${TEST_OUTDIR.tests/aho_corasick}/actest ${AC_TEST}: diff --git a/tests/capture/Makefile b/tests/capture/Makefile index bf8734df7..53d63ff2b 100644 --- a/tests/capture/Makefile +++ b/tests/capture/Makefile @@ -9,13 +9,8 @@ test:: ${TEST_OUTDIR.tests/capture}/res${n} SRC += ${TEST_SRCDIR.tests/capture}/capture${n}.c CFLAGS.${TEST_SRCDIR.tests/capture}/capture${n}.c = -UNDEBUG -.if ${CC:T:Memcc*} ${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a -.else -${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ./target/debug/liblibfsm_rs.a - ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a ./target/debug/liblibfsm_rs.a -.endif ${TEST_OUTDIR.tests/capture}/res${n}: ${TEST_OUTDIR.tests/capture}/run${n} ( ${TEST_OUTDIR.tests/capture}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/capture}/res${n} From d0be59ae3e74fe68f8835910194f886ee33c79f1 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Tue, 20 Apr 2021 13:59:12 -0500 Subject: [PATCH 09/21] Replace bitfields in struct fsm_state and struct fsm with bool C bitfields in Rust are painful, and I'd like something that has the same ABI as Rust's bool. stdbool.h's bool is just the thing. In both struct fsm_state and struct fsm: * All the bitfields are packed together. * The field that follows the bitfields is a pointer, so it has pointer alignment, so the overall size of the structs shouldn't change. --- src/libfsm/internal.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/libfsm/internal.h b/src/libfsm/internal.h index a94f11141..fb9ce23e2 100644 --- a/src/libfsm/internal.h +++ b/src/libfsm/internal.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -53,14 +54,14 @@ struct fsm_edge { }; struct fsm_state { - unsigned int end:1; + bool end; /* If 0, then this state has no need for checking * the fsm->capture_info struct. */ - unsigned int has_capture_actions:1; + bool has_capture_actions; /* meaningful within one particular transformation only */ - unsigned int visited:1; + bool visited; struct edge_set *edges; struct state_set *epsilons; @@ -74,7 +75,7 @@ struct fsm { size_t endcount; fsm_state_t start; - unsigned int hasstart:1; + bool hasstart; struct fsm_capture_info *capture_info; struct endid_info *endid_info; From 8fb687b3d4ab10137e11aa26c15f86be527cd51d Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Tue, 20 Apr 2021 14:28:43 -0500 Subject: [PATCH 10/21] Start sketching types for State and Fsm These come from internal.h. For now the Rust types have the same ABI as the C types; we'll have parallel structs for a while until all the internals get rustified. --- Cargo.toml | 2 +- src/libfsm/fsm.rs | 60 +++++++++++++++++++++++++++++++++++++++++++ src/libfsm/internal.h | 2 ++ src/libfsm/lib.rs | 2 ++ 4 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 src/libfsm/fsm.rs diff --git a/Cargo.toml b/Cargo.toml index 48f6a50a1..adadce09b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,4 +11,4 @@ path = "src/libfsm/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] - +libc = "0.2" diff --git a/src/libfsm/fsm.rs b/src/libfsm/fsm.rs new file mode 100644 index 000000000..d50d98e3d --- /dev/null +++ b/src/libfsm/fsm.rs @@ -0,0 +1,60 @@ +//! Representation of a Finite State Machine. + +use libc::c_void; + +// Keep in sync with include/fsm.h:fsm_state_t +type StateId = u32; + +// Opaque pointer to struct edget_set +type EdgeSet = *mut c_void; + +// Opaque pointer to struct state_set +type StateSet = *mut c_void; + +// Opaque pointer to struct fsm_capture_info +type CaptureInfo = *mut c_void; + +// Opaque pointer to struct endid_info +type EndIdInfo = *mut c_void; + +// Opaque pointer to struct fsm_options +type Options = *const c_void; + +/// One state in a `Fsm`'s array of states. +// Keep in sync with interanl.h:struct fsm_state +#[repr(C)] +struct State { + end: bool, + + /// If false, then this state has no need for checking the fsm->capture_info struct. + has_capture_actions: bool, + + /// Meaningful within one particular transformation only. + visited: bool, + + edges: EdgeSet, + epsilons: StateSet, +} + +/// Finite State Machine. +// Keep in sync with interanl.h:struct fsm +#[repr(C)] +struct Fsm { + /// Array. + states: *mut State, + + /// Number of elements allocated. + statealloc: usize, + + /// Number of elements populated. + statecount: usize, + + endcount: usize, + + start: StateId, + hasstart: bool, + + capture_info: CaptureInfo, + endid_info: EndIdInfo, + opt: Options, +} diff --git a/src/libfsm/internal.h b/src/libfsm/internal.h index fb9ce23e2..69a1560ff 100644 --- a/src/libfsm/internal.h +++ b/src/libfsm/internal.h @@ -53,6 +53,7 @@ struct fsm_edge { unsigned char symbol; }; +/* Keep in sync with fsm.rs:State */ struct fsm_state { bool end; @@ -67,6 +68,7 @@ struct fsm_state { struct state_set *epsilons; }; +/* Keep in sync with fsm.rs:Fsm */ struct fsm { struct fsm_state *states; /* array */ diff --git a/src/libfsm/lib.rs b/src/libfsm/lib.rs index def4ba84e..428facdcd 100644 --- a/src/libfsm/lib.rs +++ b/src/libfsm/lib.rs @@ -1,3 +1,5 @@ +mod fsm; + // just here to prove linking works #[no_mangle] pub extern "C" fn fsm_noop() { From b4a98c22f3fc770350af78cb402599b5a70cebbd Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Tue, 20 Apr 2021 16:16:52 -0500 Subject: [PATCH 11/21] Move all of start.c to Rust The strategy for this commit: * Implement clear_start / set_start / get_start in Rust as methods, with argument checks as assertions. * Implement a C ABI wrapper to match the fsm_clearstart() / etc. functions. The non-null argument checks go there. * Test both sets of functions right there. We cannot construct a Fsm in Rust just yet, so the tests hack up just enough initial state for the code to run; this can be removed later. * Remove start.c. Later we can move all of the C API to a c_api.rs or whatever. --- src/libfsm/Makefile | 1 - src/libfsm/fsm.rs | 113 +++++++++++++++++++++++++++++++++++++++++++- src/libfsm/start.c | 49 ------------------- 3 files changed, 112 insertions(+), 51 deletions(-) delete mode 100644 src/libfsm/start.c diff --git a/src/libfsm/Makefile b/src/libfsm/Makefile index c84f504bd..bce7c1791 100644 --- a/src/libfsm/Makefile +++ b/src/libfsm/Makefile @@ -14,7 +14,6 @@ SRC += src/libfsm/equal.c SRC += src/libfsm/exec.c SRC += src/libfsm/fsm.c SRC += src/libfsm/mode.c -SRC += src/libfsm/start.c SRC += src/libfsm/state.c SRC += src/libfsm/trim.c SRC += src/libfsm/example.c diff --git a/src/libfsm/fsm.rs b/src/libfsm/fsm.rs index d50d98e3d..ada4e7c68 100644 --- a/src/libfsm/fsm.rs +++ b/src/libfsm/fsm.rs @@ -39,7 +39,7 @@ struct State { /// Finite State Machine. // Keep in sync with interanl.h:struct fsm #[repr(C)] -struct Fsm { +pub struct Fsm { /// Array. states: *mut State, @@ -58,3 +58,114 @@ struct Fsm { endid_info: EndIdInfo, opt: Options, } + +impl Fsm { + pub fn clear_start(&mut self) { + self.hasstart = false; + } + + pub fn set_start(&mut self, state: StateId) { + assert!((state as usize) < self.statecount); + + self.start = state; + self.hasstart = true; + } + + pub fn get_start(&self) -> Option { + if self.hasstart { + assert!((self.start as usize) < self.statecount); + Some(self.start) + } else { + None + } + } +} + +#[no_mangle] +pub unsafe fn fsm_clearstart(fsm: *mut Fsm) { + assert!(!fsm.is_null()); + let fsm = &mut *fsm; + + fsm.clear_start(); +} + +#[no_mangle] +pub unsafe fn fsm_setstart(fsm: *mut Fsm, state: StateId) { + assert!(!fsm.is_null()); + let fsm = &mut *fsm; + + fsm.set_start(state); +} + +#[no_mangle] +pub unsafe fn fsm_getstart(fsm: *const Fsm, out_start: *mut StateId) -> i32 { + assert!(!fsm.is_null()); + assert!(!out_start.is_null()); + + let fsm = &*fsm; + match fsm.get_start() { + Some(id) => { + *out_start = id; + 1 + } + + None => 0 + } +} + +#[cfg(test)] +mod tests { + use std::ptr; + use super::*; + + // Just here until we can construct Fsm in Rust + fn dummy_fsm_new() -> Fsm { + Fsm { + states: ptr::null_mut(), + statealloc: 0, + statecount: 0, + endcount: 0, + start: 0, + hasstart: false, + capture_info: ptr::null_mut(), + endid_info: ptr::null_mut(), + opt: ptr::null(), + } + } + + #[test] + fn sets_and_clears_start() { + let mut fsm = dummy_fsm_new(); + + // FIXME: this sets up an inconsistent state, but we need it to + // test the assertion inside fsm.set_start(). + fsm.statecount = 1; + + fsm.set_start(0); + assert_eq!(fsm.get_start(), Some(0)); + + fsm.clear_start(); + assert!(fsm.get_start().is_none()) + } + + #[test] + fn sets_and_clears_start_from_c() { + let mut fsm = dummy_fsm_new(); + let fsm_ptr = &mut fsm as *mut _; + + // FIXME: this sets up an inconsistent state, but we need it to + // test the assertion inside fsm_setstart(). + fsm.statecount = 1; + + unsafe { + let mut n = 0; + + fsm_setstart(fsm_ptr, 0); + assert_eq!(fsm_getstart(fsm_ptr, &mut n), 1); + assert_eq!(n, 0); + + fsm_clearstart(fsm_ptr); + assert_eq!(fsm_getstart(fsm_ptr, &mut n), 0); + } + } +} diff --git a/src/libfsm/start.c b/src/libfsm/start.c deleted file mode 100644 index 98ef1a4db..000000000 --- a/src/libfsm/start.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2008-2017 Katherine Flavel - * - * See LICENCE for the full copyright terms. - */ - -#include -#include - -#include - -#include - -#include "internal.h" - -void -fsm_clearstart(struct fsm *fsm) -{ - assert(fsm != NULL); - - fsm->hasstart = 0; -} - -void -fsm_setstart(struct fsm *fsm, fsm_state_t state) -{ - assert(fsm != NULL); - assert(state < fsm->statecount); - - fsm->start = state; - fsm->hasstart = 1; -} - -int -fsm_getstart(const struct fsm *fsm, fsm_state_t *start) -{ - assert(fsm != NULL); - assert(start != NULL); - - if (!fsm->hasstart) { - return 0; - } - - assert(fsm->start < fsm->statecount); - - *start = fsm->start; - return 1; -} - From 4277bb448f5c8c311d25988c04f8f7bc65399f43 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Tue, 20 Apr 2021 16:20:24 -0500 Subject: [PATCH 12/21] Remove example test from lib.rs --- src/libfsm/lib.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/libfsm/lib.rs b/src/libfsm/lib.rs index 428facdcd..41030aa6f 100644 --- a/src/libfsm/lib.rs +++ b/src/libfsm/lib.rs @@ -4,11 +4,3 @@ mod fsm; #[no_mangle] pub extern "C" fn fsm_noop() { } - -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - assert_eq!(2 + 2, 4); - } -} From 918798e80d4dd2e19aa3f9e1b8221883484afb40 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Tue, 20 Apr 2021 16:20:36 -0500 Subject: [PATCH 13/21] Hack in -lpthread -ldl everywhere needed I really don't know the best way to do this with pmake; someone who knows better should fix this :) --- src/fsm/Makefile | 2 ++ src/lx/Makefile | 2 +- src/re/Makefile | 5 ++++- src/retest/Makefile | 5 +++-- tests/aho_corasick/Makefile | 2 +- tests/capture/Makefile | 2 +- tests/queue/Makefile | 2 +- 7 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/fsm/Makefile b/src/fsm/Makefile index 5b5f212fa..3eb6c7af4 100644 --- a/src/fsm/Makefile +++ b/src/fsm/Makefile @@ -61,6 +61,8 @@ LDD_VER!= \ LFLAGS.fsm += -lrt .endif +LFLAGS.fsm += -lpthread -ldl + .endif .endif diff --git a/src/lx/Makefile b/src/lx/Makefile index 683341ab0..2a3f631f9 100644 --- a/src/lx/Makefile +++ b/src/lx/Makefile @@ -24,7 +24,7 @@ CFLAGS.${src} += -pthread DFLAGS.${src} += -pthread .endfor .endif -LFLAGS.lx += -lpthread +LFLAGS.lx += -lpthread -ldl LEXER += src/lx/lexer.lx PARSER += src/lx/parser.sid diff --git a/src/re/Makefile b/src/re/Makefile index 6563b0b8c..385f04622 100644 --- a/src/re/Makefile +++ b/src/re/Makefile @@ -17,9 +17,12 @@ ${BUILD}/bin/re: ${BUILD}/lib/${lib:R}.a ${BUILD}/bin/re: ${BUILD}/${src:R}.o .endfor +.if ${SYSTEM} == Linux +LFLAGS.re += -lpthread -ldl +.endif + # smoke test for sh codegen -- does not work for bash 3.2.x .if ${SYSTEM} != Darwin test:: ${BUILD}/bin/re echo -n abcd | bash -c "`./build/bin/re -pl sh -k str -br glob 'a?c*d'`" .endif - diff --git a/src/retest/Makefile b/src/retest/Makefile index b56d5b86f..07ef50443 100644 --- a/src/retest/Makefile +++ b/src/retest/Makefile @@ -16,8 +16,9 @@ DFLAGS.${src} += -std=c99 .endfor .if ${SYSTEM} == Linux -LFLAGS.reperf += -ldl -LFLAGS.retest += -ldl +LFLAGS.reperf += -lpthread -ldl +LFLAGS.retest += -lpthread -ldl +LFLAGS.cvtpcre += -lpthread -ldl .endif PROG += retest diff --git a/tests/aho_corasick/Makefile b/tests/aho_corasick/Makefile index 5748ddd5c..ff6ebd1f6 100644 --- a/tests/aho_corasick/Makefile +++ b/tests/aho_corasick/Makefile @@ -19,7 +19,7 @@ ${TEST_OUTDIR.tests/aho_corasick}/actest: ${BUILD}/lib/${lib:R}.a AC_TEST=${TEST_OUTDIR.tests/aho_corasick}/actest ${AC_TEST}: - ${CC} ${CFLAGS} -o ${.TARGET} ${.ALLSRC} + ${CC} ${CFLAGS} -o ${.TARGET} ${.ALLSRC} -lpthread -ldl test:: ${AC_TEST} diff --git a/tests/capture/Makefile b/tests/capture/Makefile index 53d63ff2b..e9a80a9b3 100644 --- a/tests/capture/Makefile +++ b/tests/capture/Makefile @@ -10,7 +10,7 @@ SRC += ${TEST_SRCDIR.tests/capture}/capture${n}.c CFLAGS.${TEST_SRCDIR.tests/capture}/capture${n}.c = -UNDEBUG ${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o - ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a + ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a -lpthread -ldl ${TEST_OUTDIR.tests/capture}/res${n}: ${TEST_OUTDIR.tests/capture}/run${n} ( ${TEST_OUTDIR.tests/capture}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/capture}/res${n} diff --git a/tests/queue/Makefile b/tests/queue/Makefile index a675667ee..22cb31def 100644 --- a/tests/queue/Makefile +++ b/tests/queue/Makefile @@ -9,7 +9,7 @@ test:: ${TEST_OUTDIR.tests/queue}/res${n} SRC += ${TEST_SRCDIR.tests/queue}/queue${n}.c CFLAGS.${TEST_SRCDIR.tests/queue}/queue${n}.c = -UNDEBUG ${TEST_OUTDIR.tests/queue}/run${n}: ${TEST_OUTDIR.tests/queue}/queue${n}.o ${BUILD}/lib/adt.o - ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/queue}/run${n} ${TEST_OUTDIR.tests/queue}/queue${n}.o ${BUILD}/lib/adt.o + ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/queue}/run${n} ${TEST_OUTDIR.tests/queue}/queue${n}.o ${BUILD}/lib/adt.o -lpthread -ldl ${TEST_OUTDIR.tests/queue}/res${n}: ${TEST_OUTDIR.tests/queue}/run${n} ( ${TEST_OUTDIR.tests/queue}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/queue}/res${n} .endfor From 84d60c79f78e915c6ac79af919c8634ff3e2097a Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Tue, 20 Apr 2021 18:43:39 -0500 Subject: [PATCH 14/21] Move end.c to Rust Another easy function, wheeee! --- src/libfsm/Makefile | 1 - src/libfsm/end.c | 42 -------------------------- src/libfsm/fsm.rs | 72 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 43 deletions(-) delete mode 100644 src/libfsm/end.c diff --git a/src/libfsm/Makefile b/src/libfsm/Makefile index bce7c1791..fefec2ed7 100644 --- a/src/libfsm/Makefile +++ b/src/libfsm/Makefile @@ -8,7 +8,6 @@ SRC += src/libfsm/clone.c SRC += src/libfsm/closure.c SRC += src/libfsm/edge.c SRC += src/libfsm/empty.c -SRC += src/libfsm/end.c SRC += src/libfsm/endids.c SRC += src/libfsm/equal.c SRC += src/libfsm/exec.c diff --git a/src/libfsm/end.c b/src/libfsm/end.c deleted file mode 100644 index 5a4aa0d72..000000000 --- a/src/libfsm/end.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2008-2017 Katherine Flavel - * - * See LICENCE for the full copyright terms. - */ - -#include -#include - -#include -#include - -#include - -#include "internal.h" - -void -fsm_setend(struct fsm *fsm, fsm_state_t state, int end) -{ - (void) fsm; - - assert(fsm != NULL); - assert(state < fsm->statecount); - - if (fsm->states[state].end == !!end) { - return; - } - - switch (end) { - case 0: - assert(fsm->endcount > 0); - fsm->endcount--; - fsm->states[state].end = 0; - break; - - case 1: - assert(fsm->endcount < FSM_ENDCOUNT_MAX); - fsm->endcount++; - fsm->states[state].end = 1; - break; - } -} diff --git a/src/libfsm/fsm.rs b/src/libfsm/fsm.rs index ada4e7c68..cae7d1ddd 100644 --- a/src/libfsm/fsm.rs +++ b/src/libfsm/fsm.rs @@ -1,6 +1,7 @@ //! Representation of a Finite State Machine. use libc::c_void; +use std::slice; // Keep in sync with include/fsm.h:fsm_state_t type StateId = u32; @@ -20,6 +21,8 @@ type EndIdInfo = *mut c_void; // Opaque pointer to struct fsm_options type Options = *const c_void; +const ENDCOUNT_MAX: usize = usize::MAX; + /// One state in a `Fsm`'s array of states. // Keep in sync with interanl.h:struct fsm_state #[repr(C)] @@ -79,6 +82,36 @@ impl Fsm { None } } + + fn states_as_mut_slice(&self) -> &mut [State] { + unsafe { slice::from_raw_parts_mut(self.states, self.statecount) } + } + + pub fn set_end(&mut self, state: StateId, end: bool) { + assert!((state as usize) < self.statecount); + + { + // Temporary scope so the `s` mutable borrow terminates before + // we frob `self.endcount` below. + + let states = self.states_as_mut_slice(); + let s = &mut states[state as usize]; + + if s.end == end { + return; + } else { + s.end = end; + } + } + + if end { + assert!(self.endcount < ENDCOUNT_MAX); + self.endcount += 1; + } else { + assert!(self.endcount > 0); + self.endcount -= 1; + } + } } #[no_mangle] @@ -113,6 +146,16 @@ pub unsafe fn fsm_getstart(fsm: *const Fsm, out_start: *mut StateId) -> i32 { } } +#[no_mangle] +pub unsafe fn fsm_setend(fsm: *mut Fsm, state: StateId, end: i32) { + assert!(!fsm.is_null()); + let fsm = &mut *fsm; + + let end = if end != 0 { true } else { false }; + + fsm.set_end(state, end) +} + #[cfg(test)] mod tests { use std::ptr; @@ -168,4 +211,33 @@ mod tests { assert_eq!(fsm_getstart(fsm_ptr, &mut n), 0); } } + + #[test] + fn set_end_works() { + let mut fsm = dummy_fsm_new(); + + fn make_state() -> State { + State { + end: false, + has_capture_actions: false, + visited: false, + edges: ptr::null_mut(), + epsilons: ptr::null_mut(), + } + } + + let mut states = vec![make_state(), make_state(), make_state()]; + + fsm.states = states.as_mut_ptr(); + fsm.statecount = 3; + fsm.statealloc = 3; + + fsm.set_end(1, true); // yay aliased mutability + assert_eq!(fsm.endcount, 1); + assert!(states[1].end); + + fsm.set_end(1, false); + assert_eq!(fsm.endcount, 0); + assert!(!states[1].end); + } } From 701f0fc47b63912d687b0518017f6da4cee920f0 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Tue, 20 Apr 2021 18:45:12 -0500 Subject: [PATCH 15/21] cargo fmt --- src/libfsm/fsm.rs | 4 ++-- src/libfsm/lib.rs | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/libfsm/fsm.rs b/src/libfsm/fsm.rs index cae7d1ddd..294d47e4a 100644 --- a/src/libfsm/fsm.rs +++ b/src/libfsm/fsm.rs @@ -142,7 +142,7 @@ pub unsafe fn fsm_getstart(fsm: *const Fsm, out_start: *mut StateId) -> i32 { 1 } - None => 0 + None => 0, } } @@ -158,8 +158,8 @@ pub unsafe fn fsm_setend(fsm: *mut Fsm, state: StateId, end: i32) { #[cfg(test)] mod tests { - use std::ptr; use super::*; + use std::ptr; // Just here until we can construct Fsm in Rust fn dummy_fsm_new() -> Fsm { diff --git a/src/libfsm/lib.rs b/src/libfsm/lib.rs index 41030aa6f..e398b586b 100644 --- a/src/libfsm/lib.rs +++ b/src/libfsm/lib.rs @@ -2,5 +2,4 @@ mod fsm; // just here to prove linking works #[no_mangle] -pub extern "C" fn fsm_noop() { -} +pub extern "C" fn fsm_noop() {} From d2aab46c0daeeb593f5054e29d057c919df45cf5 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Fri, 23 Apr 2021 16:51:26 -0500 Subject: [PATCH 16/21] fsm_addstate(): initialize all fields in the new fsm_state --- src/libfsm/state.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/libfsm/state.c b/src/libfsm/state.c index acf2bff25..dac9217d9 100644 --- a/src/libfsm/state.c +++ b/src/libfsm/state.c @@ -57,10 +57,11 @@ fsm_addstate(struct fsm *fsm, fsm_state_t *state) new = &fsm->states[fsm->statecount]; - new->end = 0; - new->visited = 0; - new->epsilons = NULL; - new->edges = NULL; + new->end = 0; + new->has_capture_actions = false; + new->visited = 0; + new->epsilons = NULL; + new->edges = NULL; } fsm->statecount++; From 036d1eb03f975b93027bcad70b21833bbed780a6 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Fri, 23 Apr 2021 17:23:29 -0500 Subject: [PATCH 17/21] fsm_addstate_bulk(): Initialize all the fields in the new states --- src/libfsm/state.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/libfsm/state.c b/src/libfsm/state.c index dac9217d9..9b684cc63 100644 --- a/src/libfsm/state.c +++ b/src/libfsm/state.c @@ -82,10 +82,11 @@ fsm_addstate_bulk(struct fsm *fsm, size_t n) new = &fsm->states[fsm->statecount + i]; - new->end = 0; - new->visited = 0; - new->epsilons = NULL; - new->edges = NULL; + new->end = 0; + new->has_capture_actions = false; + new->visited = 0; + new->epsilons = NULL; + new->edges = NULL; } fsm->statecount += n; From ce58305f33863496f6751e66f6e02365d4fffe7d Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Fri, 23 Apr 2021 17:26:13 -0500 Subject: [PATCH 18/21] state.c: Extract function to initialize a state Instead of duplicating the code. --- src/libfsm/state.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/src/libfsm/state.c b/src/libfsm/state.c index 9b684cc63..fc74aa815 100644 --- a/src/libfsm/state.c +++ b/src/libfsm/state.c @@ -18,6 +18,16 @@ #include "internal.h" +static void +init_state(struct fsm_state *state) +{ + state->end = 0; + state->has_capture_actions = false; + state->visited = 0; + state->epsilons = NULL; + state->edges = NULL; +} + int fsm_addstate(struct fsm *fsm, fsm_state_t *state) { @@ -52,17 +62,7 @@ fsm_addstate(struct fsm *fsm, fsm_state_t *state) *state = fsm->statecount; } - { - struct fsm_state *new; - - new = &fsm->states[fsm->statecount]; - - new->end = 0; - new->has_capture_actions = false; - new->visited = 0; - new->epsilons = NULL; - new->edges = NULL; - } + init_state(&fsm->states[fsm->statecount]); fsm->statecount++; @@ -78,15 +78,7 @@ fsm_addstate_bulk(struct fsm *fsm, size_t n) if (fsm->statecount + n <= fsm->statealloc) { for (i = 0; i < n; i++) { - struct fsm_state *new; - - new = &fsm->states[fsm->statecount + i]; - - new->end = 0; - new->has_capture_actions = false; - new->visited = 0; - new->epsilons = NULL; - new->edges = NULL; + init_state(&fsm->states[fsm->statecount + i]); } fsm->statecount += n; From 73cdd10fb2dfcc7149c6154f6414c65dc61e8973 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Fri, 23 Apr 2021 16:57:57 -0500 Subject: [PATCH 19/21] fsm_addstate(): no need to initialize has_capture_actions in the realloc block In all places where a state's .has_capture_actions gets accessed, there is an assert that the index of the state fits within the fsm->statecount. Since in the previous commit we initialize the has_capture_actions field when a new state is added, there is no need to initialize that field for all the as-yet-unused state structs in the realloc block. --- src/libfsm/state.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/libfsm/state.c b/src/libfsm/state.c index fc74aa815..8051d2ffb 100644 --- a/src/libfsm/state.c +++ b/src/libfsm/state.c @@ -50,10 +50,6 @@ fsm_addstate(struct fsm *fsm, fsm_state_t *state) return 0; } - for (i = fsm->statealloc; i < n; i++) { - tmp[i].has_capture_actions = 0; - } - fsm->statealloc = n; fsm->states = tmp; } From 8a5468cab41a3d369be765ca19999c562871e256 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Fri, 23 Apr 2021 17:08:55 -0500 Subject: [PATCH 20/21] fsm_addstate(): Remove non-working test for statecount == -1 The -1 was meant to be an OOM sentinel, but there is no place in the code that actually sets fsm->statecount to -1 when the states array cannot be (re)allocated. The code after the patch does the realloc(), and properly returns a failure code if realloc() fails. I don't think we need to preserve setting "errno = ENOMEM" since there is no other place in the code that signals OOM through errno. --- src/libfsm/state.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/libfsm/state.c b/src/libfsm/state.c index 8051d2ffb..24ab10619 100644 --- a/src/libfsm/state.c +++ b/src/libfsm/state.c @@ -33,11 +33,6 @@ fsm_addstate(struct fsm *fsm, fsm_state_t *state) { assert(fsm != NULL); - if (fsm->statecount == (fsm_state_t) -1) { - errno = ENOMEM; - return 0; - } - /* TODO: something better than one contigious realloc */ if (fsm->statecount == fsm->statealloc) { const size_t factor = 2; /* a guess */ From 840b84753a28b06ca9fdaa4732fb037c1a8fbf3c Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Fri, 23 Apr 2021 17:36:33 -0500 Subject: [PATCH 21/21] fsm_capture_init(): Initialize up to fsm->statecount fields, not the whole statealloc Only the first statecount state structs are valid, anyway. --- src/libfsm/capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libfsm/capture.c b/src/libfsm/capture.c index c6ae38c08..726202a46 100644 --- a/src/libfsm/capture.c +++ b/src/libfsm/capture.c @@ -21,7 +21,7 @@ fsm_capture_init(struct fsm *fsm) } fsm->capture_info = ci; - for (i = 0; i < fsm->statealloc; i++) { + for (i = 0; i < fsm->statecount; i++) { fsm->states[i].has_capture_actions = 0; }