Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datasets locking/v7 #12366

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ include = [
"FtpEvent",
"SCSigTableElmt",
"SCTransformTableElmt",
"DataRepType",
]

# A list of items to not include in the generated bindings
Expand Down
16 changes: 16 additions & 0 deletions rust/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ pub mod nom7 {
}
}

#[macro_export]
macro_rules!unwrap_or_return {
($e:expr, $r:expr) => {
match $e {
Ok(x) => x,
Err(_) => return $r,
}
}
}

#[cfg(not(feature = "debug-validate"))]
#[macro_export]
macro_rules! debug_validate_bug_on (
Expand Down Expand Up @@ -169,3 +179,9 @@ pub unsafe extern "C" fn rs_to_hex_sep(
// overwrites last separator with final null char
oslice[3 * islice.len() - 1] = 0;
}

// Defined in util-debug.c
/// cbindgen:ignore
extern {
pub fn CallbackFatalErrorOnInit(arg: *const c_char);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jasonish should we create a 'public' SCFatalErrorOnInit instead?

Also, would like it to behave like FatalError and SCLog... to take a format string.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, should be a macro in Rust like the other log ones.

Much easier to do if FatalErrorOnOnit can become a C function though, rather than a macro, otherwise it'll need a C wrapper function first.

}
152 changes: 152 additions & 0 deletions rust/src/detect/datasets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/* Copyright (C) 2025 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

// Author: Shivani Bhardwaj <[email protected]>

//! This module exposes items from the datasets C code to Rust.

use crate::common::CallbackFatalErrorOnInit;
use base64::{self, Engine};
use std::ffi::{c_char, CStr, CString};
use std::fs::{File, OpenOptions};
use std::io::{self, BufRead};
use std::path::Path;

/// Opaque Dataset type defined in C
#[derive(Copy, Clone)]
pub enum Dataset {}

// Simple C type converted to Rust
#[derive(Debug, PartialEq)]
#[repr(C)]
pub struct DataRepType {
pub value: u16,
}

// Extern fns operating on the opaque Dataset type above
/// cbindgen:ignore
extern "C" {
pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32;
pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType)
-> i32;
}

#[no_mangle]
pub unsafe extern "C" fn ProcessStringDatasets(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we handle all other types as well in rust to fully address the issue?

set: &Dataset, name: *const c_char, fname: *const c_char, fmode: *const c_char,
) -> i32 {
let file_string = unwrap_or_return!(CStr::from_ptr(fname).to_str(), -2);
let mode = unwrap_or_return!(CStr::from_ptr(fmode).to_str(), -2);
let set_name = unwrap_or_return!(CStr::from_ptr(name).to_str(), -2);
let filename = Path::new(file_string);
let mut is_dataset = false;
let mut is_datarep = false;
if let Ok(lines) = read_lines(filename, mode) {
for line in lines.map_while(Result::ok) {
let v: Vec<&str> = line.split(',').collect();
// Ignore empty and invalid lines in dataset/rep file
if v.is_empty() || v.len() > 2 {
continue;
}

if v.len() == 1 {
if is_datarep {
SCLogError!(
"Cannot mix dataset and datarep values for set {} in {}",
set_name,
filename.display()
);
return -2;
}
// Dataset
is_dataset = true;
let mut decoded: Vec<u8> = vec![];
if base64::engine::general_purpose::STANDARD
.decode_vec(v[0], &mut decoded)
.is_err()
{
let msg = CString::new(format!(
"bad base64 encoding {} in {}",
set_name,
filename.display()
))
.unwrap();
CallbackFatalErrorOnInit(msg.as_ptr());
continue;
}
DatasetAdd(set, decoded.as_ptr(), decoded.len() as u32);
} else {
if is_dataset {
SCLogError!(
"Cannot mix dataset and datarep values for set {} in {}",
set_name,
filename.display()
);
return -2;
}
// Datarep
is_datarep = true;
let mut decoded: Vec<u8> = vec![];
if base64::engine::general_purpose::STANDARD
.decode_vec(v[0], &mut decoded)
.is_err()
{
let msg = CString::new(format!(
"bad base64 encoding {} in {}",
set_name,
filename.display()
))
.unwrap();
CallbackFatalErrorOnInit(msg.as_ptr());
continue;
}
if let Ok(val) = v[1].to_string().parse::<u16>() {
let rep: DataRepType = DataRepType { value: val };
DatasetAddwRep(set, decoded.as_ptr(), decoded.len() as u32, &rep);
} else {
let msg = CString::new(format!(
"invalid datarep value {} in {}",
set_name,
filename.display()
))
.unwrap();
CallbackFatalErrorOnInit(msg.as_ptr());
continue;
}
}
}
} else {
return -1;
}
0
}

fn read_lines<P>(filename: P, fmode: &str) -> io::Result<io::Lines<io::BufReader<File>>>
where
P: AsRef<Path>,
{
let file: File = if fmode == "r" {
File::open(filename)?
} else {
OpenOptions::new()
.append(true)
.create(true)
.read(true)
.open(filename)?
};
Ok(io::BufReader::new(file).lines())
}
1 change: 1 addition & 0 deletions rust/src/detect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub mod transforms;
pub mod uint;
pub mod uri;
pub mod tojson;
pub mod datasets;

use crate::core::AppProto;
use std::os::raw::{c_int, c_void};
Expand Down
4 changes: 1 addition & 3 deletions src/datasets-reputation.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
#ifndef SURICATA_DATASETS_REPUTATION_H
#define SURICATA_DATASETS_REPUTATION_H

typedef struct DataRepType {
uint16_t value;
} DataRepType;
#include "rust-bindings.h"

typedef struct DataRepResultType {
bool found;
Expand Down
75 changes: 7 additions & 68 deletions src/datasets.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ SCMutex sets_lock = SCMUTEX_INITIALIZER;
static Dataset *sets = NULL;
static uint32_t set_ids = 0;

static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
DataRepType *rep);
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);

static inline void DatasetUnlockData(THashData *d)
{
Expand Down Expand Up @@ -496,80 +495,21 @@ static int DatasetLoadString(Dataset *set)
return 0;

SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);

const char *fopen_mode = "r";
if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
fopen_mode = "a+";
}

FILE *fp = fopen(set->load, fopen_mode);
if (fp == NULL) {
SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
int retval = ProcessStringDatasets(set, set->name, set->load, fopen_mode);
if (retval == -2) {
FatalErrorOnInit("dataset %s could not be processed", set->name);
} else if (retval == -1) {
return -1;
}

uint32_t cnt = 0;
char line[1024];
while (fgets(line, (int)sizeof(line), fp) != NULL) {
if (strlen(line) <= 1)
continue;

char *r = strchr(line, ',');
if (r == NULL) {
line[strlen(line) - 1] = '\0';
SCLogDebug("line: '%s'", line);
uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
// coverity[alloc_strlen : FALSE]
uint8_t decoded[decoded_size];
uint32_t num_decoded =
Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
if (num_decoded == 0 && strlen(line) > 0) {
FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
continue;
}

if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) {
FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
continue;
}
cnt++;
} else {
line[strlen(line) - 1] = '\0';
SCLogDebug("line: '%s'", line);

*r = '\0';

uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
uint8_t decoded[decoded_size];
uint32_t num_decoded =
Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
if (num_decoded == 0) {
FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
continue;
}

r++;
SCLogDebug("r '%s'", r);

DataRepType rep = { .value = 0 };
if (ParseRepLine(r, strlen(r), &rep) < 0) {
FatalErrorOnInit("die: bad rep");
continue;
}
SCLogDebug("rep %u", rep.value);

if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) {
FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
continue;
}
cnt++;

SCLogDebug("line with rep %s, %s", line, r);
}
}
THashConsolidateMemcap(set->hash);

fclose(fp);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}

Expand Down Expand Up @@ -1572,8 +1512,7 @@ int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
return -1;
}

static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
DataRepType *rep)
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
{
if (set == NULL)
return -1;
Expand Down
1 change: 1 addition & 0 deletions src/datasets.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#define SURICATA_DATASETS_H

#include "util-thash.h"
#include "rust.h"
#include "datasets-reputation.h"

int DatasetsInit(void);
Expand Down
2 changes: 0 additions & 2 deletions src/detect-dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@
#define DETECT_DATASET_CMD_ISNOTSET 2
#define DETECT_DATASET_CMD_ISSET 3

int DetectDatasetMatch (ThreadVars *, DetectEngineThreadCtx *, Packet *,
const Signature *, const SigMatchCtx *);
static int DetectDatasetSetup (DetectEngineCtx *, Signature *, const char *);
void DetectDatasetFree (DetectEngineCtx *, void *);

Expand Down
5 changes: 5 additions & 0 deletions src/util-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,11 @@ void SCLogDeInitLogModule(void)
#endif /* OS_WIN32 */
}

void CallbackFatalErrorOnInit(const char *arg)
{
FatalErrorOnInit("%s", arg);
}

//------------------------------------Unit_Tests--------------------------------

/* The logging engine should be tested to the maximum extent possible, since
Expand Down
2 changes: 2 additions & 0 deletions src/util-debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,8 @@ SCLogInitData *SCLogAllocLogInitData(void);

void SCLogAppendOPIfaceCtx(SCLogOPIfaceCtx *, SCLogInitData *);

void CallbackFatalErrorOnInit(const char *);

void SCLogInitLogModule(SCLogInitData *);

void SCLogDeInitLogModule(void);
Expand Down
Loading