Skip to content

Commit

Permalink
feat: added binary analysis plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
j-lanson committed Oct 25, 2024
1 parent 2e4302e commit 0c4105c
Show file tree
Hide file tree
Showing 10 changed files with 826 additions and 10 deletions.
33 changes: 25 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ members = [
"plugins/fuzz",
"plugins/entropy",
"plugins/linguist",
"plugins/review"
"plugins/review",
"plugins/binary"
]

# Make sure Hipcheck is run with `cargo run`.
Expand Down
3 changes: 2 additions & 1 deletion config/Hipcheck.kdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ analyze {

category "practices" {
analysis "mitre/activity" policy="(lte $ 52)" weight=3
analysis "mitre/binary" policy="(eq 0 (count $))" {
analysis "mitre/binary" {
binary-file "./config/Binary.toml"
binary-file-threshold "0"
}
analysis "mitre/fuzz" policy="(eq #t $)"
analysis "mitre/review" policy="(lte $ 0.05)"
Expand Down
22 changes: 22 additions & 0 deletions plugins/binary/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "binary"
version = "0.1.0"
license = "Apache-2.0"
edition = "2021"
publish = false

[dependencies]
clap = { version = "4.5.20", features = ["derive"] }
content_inspector = "0.2.4"
hipcheck-sdk = { version = "0.1.0", path = "../../sdk/rust", features = ["macros"] }
log = "0.4.22"
pathbuf = "1.0.0"
schemars = "0.8.21"
serde = "1.0.213"
serde_json = "1.0.132"
tokio = { version = "1.41.0", features = ["rt"] }
toml = "0.8.19"
walkdir = "2.5.0"

[dev-dependencies]
hipcheck-sdk = { path = "../../sdk/rust", features = ["mock_engine"] }
10 changes: 10 additions & 0 deletions plugins/binary/plugin.kdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
publisher "mitre"
name "binary"
version "0.1.0"
license "Apache-2.0"
entrypoint {
on arch="aarch64-apple-darwin" "./hc-mitre-binary"
on arch="x86_64-apple-darwin" "./hc-mitre-binary"
on arch="x86_64-unknown-linux-gnu" "./hc-mitre-binary"
on arch="x86_64-pc-windows-msvc" "./hc-mitre-binary"
}
177 changes: 177 additions & 0 deletions plugins/binary/src/binary_detector.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// SPDX-License-Identifier: Apache-2.0

use crate::{
error::{Context, Result},
fs::read_toml,
};
use content_inspector::{inspect, ContentType};
use serde::{de::Visitor, Deserialize, Deserializer};
use std::{
fmt,
fmt::Formatter,
fs::File,
io::{prelude::Read, BufReader},
path::{Path, PathBuf},
result::Result as StdResult,
};
use walkdir::{DirEntry, WalkDir};

#[derive(Debug, PartialEq, Eq)]
pub struct BinaryFileDetector {
extensions: Vec<String>,
}

impl BinaryFileDetector {
/// Constructs a new `BinaryFileDetector` from the `Binary.toml` file.
pub fn load<P: AsRef<Path>>(binary_config_file: P) -> crate::error::Result<BinaryFileDetector> {
fn inner(binary_config_file: &Path) -> crate::error::Result<BinaryFileDetector> {
let extensions_file: ExtensionsFile = read_toml(binary_config_file)
.context("failed to read binary type defintions from Binary config file")?;

let extensions = extensions_file.into_extensions();

Ok(BinaryFileDetector { extensions })
}

inner(binary_config_file.as_ref())
}

/// Determines if a binary file matches a known file extension.
///
/// A match is assumed if an extension is not present.
pub fn is_likely_binary_file<P: AsRef<Path>>(&self, file_name: P) -> bool {
fn inner(binary_file_detector: &BinaryFileDetector, file_name: &Path) -> bool {
let extension = match file_name.extension() {
Some(e) => format!(".{}", e.to_string_lossy()),
None => return true,
};
for ext in &binary_file_detector.extensions {
if *ext == extension {
return true;
}
}
false
}
inner(self, file_name.as_ref())
}
}

#[derive(Debug, Deserialize)]
struct ExtensionsFile {
formats: Vec<BinaryExtensions>,
}

#[derive(Debug, Deserialize)]
struct BinaryExtensions {
#[serde(default = "missing_bin_type")]
r#type: BinaryType,
extensions: Option<Vec<String>>,
}

impl ExtensionsFile {
/// Collects the known file extensions from Binary.toml
fn into_extensions(self) -> Vec<String> {
let mut result = Vec::new();
for file_format in self.formats {
if matches!(
file_format.r#type,
BinaryType::Object | BinaryType::Combination | BinaryType::Executable
) {
match file_format.extensions {
None => continue,
Some(mut extensions) => result.extend(extensions.drain(0..)),
}
}
}
result
}
}

#[derive(Debug)]
enum BinaryType {
Object,
Executable,
Combination,
Missing,
}

fn missing_bin_type() -> BinaryType {
BinaryType::Missing
}

impl<'de> Deserialize<'de> for BinaryType {
fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_str(BinaryTypeVisitor)
}
}

struct BinaryTypeVisitor;

impl<'de> Visitor<'de> for BinaryTypeVisitor {
type Value = BinaryType;
fn expecting(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "'executable', 'object', or 'combination'")
}

fn visit_str<E>(self, value: &str) -> StdResult<Self::Value, E>
where
E: serde::de::Error,
{
match value {
"combination" => Ok(BinaryType::Combination),
"object" => Ok(BinaryType::Object),
"executable" => Ok(BinaryType::Executable),
_ => Err(serde::de::Error::custom("unknown binary format")),
}
}
}

/// Determines whether a DirEntry is a hidden file/directory.
///
/// This is a Unix-style determination.
fn is_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| s.starts_with('.'))
.unwrap_or(false)
}

/// Fetches all files from `dir`.
fn fetch_entries(dir: &Path) -> Result<Vec<DirEntry>> {
let walker = WalkDir::new(dir).into_iter();
let mut entries: Vec<DirEntry> = Vec::new();
for entry in walker.filter_entry(|e| !is_hidden(e)) {
entries.push(entry?)
}
Ok(entries)
}

/// Searches `dir` for any binary files and records their paths as Strings.
pub fn detect_binary_files(dir: &Path) -> Result<Vec<PathBuf>> {
let path_entries = fetch_entries(dir)?;
let mut possible_binary: Vec<PathBuf> = Vec::new();

// Inspect the first 4K of each file for telltale signs of binary data.
// Store a String of each Path that leads to a binary file.
const SAMPLE_SIZE: u64 = 4096;
for entry in path_entries {
// Skip directories, as they are neither text nor binary.
if entry.path().is_dir() {
continue;
}

let working_file = File::open(entry.path())?;
let reader = BufReader::new(working_file);
let mut contents: Vec<u8> = Vec::new();
let _bytes_read = reader.take(SAMPLE_SIZE).read_to_end(&mut contents)?;
if inspect(&contents) == ContentType::BINARY {
possible_binary.push(entry.path().strip_prefix(dir)?.into());
}
}

Ok(possible_binary)
}
Loading

0 comments on commit 0c4105c

Please sign in to comment.