Skip to content

Commit

Permalink
Merge pull request #2754 from lann/factors-fix-tests
Browse files Browse the repository at this point in the history
factors: Fix tests / CI
  • Loading branch information
lann authored Aug 26, 2024
2 parents 1cae51f + c5ce6f9 commit b9ed65a
Show file tree
Hide file tree
Showing 22 changed files with 1,234 additions and 3,386 deletions.
129 changes: 63 additions & 66 deletions crates/componentize/src/bugs.rs
Original file line number Diff line number Diff line change
@@ -1,66 +1,63 @@
use anyhow::bail;
use wasm_metadata::Producers;
use wasmparser::{Encoding, ExternalKind, Parser, Payload};
use crate::module_info::ModuleInfo;

/// Represents the detected likelihood of the allocation bug fixed in
/// https://github.com/WebAssembly/wasi-libc/pull/377 being present in a Wasm
/// module.
pub const EARLIEST_PROBABLY_SAFE_CLANG_VERSION: &str = "15.0.7";

/// This error represents the likely presence of the allocation bug fixed in
/// https://github.com/WebAssembly/wasi-libc/pull/377 in a Wasm module.
#[derive(Debug, PartialEq)]
pub enum WasiLibc377Bug {
ProbablySafe,
ProbablyUnsafe,
Unknown,
pub struct WasiLibc377Bug {
clang_version: Option<String>,
}

impl WasiLibc377Bug {
pub fn detect(module: &[u8]) -> anyhow::Result<Self> {
for payload in Parser::new(0).parse_all(module) {
match payload? {
Payload::Version { encoding, .. } if encoding != Encoding::Module => {
bail!("detection only applicable to modules");
}
Payload::ExportSection(reader) => {
for export in reader {
let export = export?;
if export.kind == ExternalKind::Func && export.name == "cabi_realloc" {
// `cabi_realloc` is a good signal that this module
// uses wit-bindgen, making it probably-safe.
tracing::debug!("Found cabi_realloc export");
return Ok(Self::ProbablySafe);
}
}
}
Payload::CustomSection(c) if c.name() == "producers" => {
let producers = Producers::from_bytes(c.data(), c.data_offset())?;
if let Some(clang_version) =
producers.get("processed-by").and_then(|f| f.get("clang"))
{
tracing::debug!(clang_version, "Parsed producers.processed-by.clang");

// Clang/LLVM version is a good proxy for wasi-sdk
// version; the allocation bug was fixed in wasi-sdk-18
// and LLVM was updated to 15.0.7 in wasi-sdk-19.
if let Some((major, minor, patch)) = parse_clang_version(clang_version) {
return if (major, minor, patch) >= (15, 0, 7) {
Ok(Self::ProbablySafe)
} else {
Ok(Self::ProbablyUnsafe)
};
} else {
tracing::warn!(
clang_version,
"Unexpected producers.processed-by.clang version"
);
}
}
}
_ => (),
/// Detects the likely presence of this bug.
pub fn check(module_info: &ModuleInfo) -> Result<(), Self> {
if module_info.probably_uses_wit_bindgen() {
// Modules built with wit-bindgen are probably safe.
return Ok(());
}
if let Some(clang_version) = &module_info.clang_version {
// Clang/LLVM version is a good proxy for wasi-sdk
// version; the allocation bug was fixed in wasi-sdk-18
// and LLVM was updated to 15.0.7 in wasi-sdk-19.
if let Some((major, minor, patch)) = parse_clang_version(clang_version) {
let earliest_safe =
parse_clang_version(EARLIEST_PROBABLY_SAFE_CLANG_VERSION).unwrap();
if (major, minor, patch) >= earliest_safe {
return Ok(());
} else {
return Err(Self {
clang_version: Some(clang_version.clone()),
});
};
} else {
tracing::warn!(
clang_version,
"Unexpected producers.processed-by.clang version"
);
}
}
Ok(Self::Unknown)
// If we can't assert that the module uses wit-bindgen OR was compiled
// with a new-enough wasi-sdk, conservatively assume it may be buggy.
Err(Self {
clang_version: None,
})
}
}

impl std::fmt::Display for WasiLibc377Bug {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"This Wasm module may have been compiled with wasi-sdk version <19 which \
contains a critical memory safety bug. For more information, see: \
https://github.com/fermyon/spin/issues/2552"
)
}
}

impl std::error::Error for WasiLibc377Bug {}

fn parse_clang_version(ver: &str) -> Option<(u16, u16, u16)> {
// Strip optional trailing detail after space
let ver = ver.split(' ').next().unwrap();
Expand All @@ -77,42 +74,42 @@ mod tests {

#[test]
fn wasi_libc_377_detect() {
use WasiLibc377Bug::*;
for (wasm, expected) in [
(r#"(module)"#, Unknown),
for (wasm, safe) in [
(r#"(module)"#, false),
(
r#"(module (func (export "cabi_realloc") (unreachable)))"#,
ProbablySafe,
true,
),
(
r#"(module (func (export "some_other_function") (unreachable)))"#,
Unknown,
false,
),
(
r#"(module (@producers (processed-by "clang" "16.0.0 extra-stuff")))"#,
ProbablySafe,
true,
),
(
r#"(module (@producers (processed-by "clang" "15.0.7")))"#,
ProbablySafe,
true,
),
(
r#"(module (@producers (processed-by "clang" "15.0.6")))"#,
ProbablyUnsafe,
false,
),
(
r#"(module (@producers (processed-by "clang" "14.0.0")))"#,
ProbablyUnsafe,
r#"(module (@producers (processed-by "clang" "14.0.0 extra-stuff")))"#,
false,
),
(
r#"(module (@producers (processed-by "clang" "a.b.c")))"#,
Unknown,
false,
),
] {
eprintln!("WAT: {wasm}");
let module = wat::parse_str(wasm).unwrap();
let detected = WasiLibc377Bug::detect(&module).unwrap();
assert_eq!(detected, expected);
let module_info = ModuleInfo::from_module(&module).unwrap();
let detected = WasiLibc377Bug::check(&module_info);
assert!(detected.is_ok() == safe, "{wasm} -> {detected:?}");
}
}
}
69 changes: 40 additions & 29 deletions crates/componentize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use {
anyhow::{anyhow, Context, Result},
convert::{IntoEntityType, IntoExportKind},
module_info::ModuleInfo,
std::{borrow::Cow, collections::HashSet},
wasm_encoder::{CustomSection, ExportSection, ImportSection, Module, RawSection},
wasmparser::{Encoding, Parser, Payload},
Expand All @@ -14,6 +15,7 @@ pub mod bugs;
#[cfg(test)]
mod abi_conformance;
mod convert;
mod module_info;

const SPIN_ADAPTER: &[u8] = include_bytes!(concat!(
env!("OUT_DIR"),
Expand Down Expand Up @@ -51,8 +53,9 @@ pub fn componentize_if_necessary(module_or_component: &[u8]) -> Result<Cow<[u8]>
}

pub fn componentize(module: &[u8]) -> Result<Vec<u8>> {
match WitBindgenVersion::from_module(module)? {
WitBindgenVersion::V0_2 => componentize_old_bindgen(module),
let module_info = ModuleInfo::from_module(module)?;
match WitBindgenVersion::detect(&module_info)? {
WitBindgenVersion::V0_2OrNone => componentize_old_module(module, &module_info),
WitBindgenVersion::GreaterThanV0_4 => componentize_new_bindgen(module),
WitBindgenVersion::Other(other) => Err(anyhow::anyhow!(
"cannot adapt modules created with wit-bindgen version {other}"
Expand All @@ -65,40 +68,36 @@ pub fn componentize(module: &[u8]) -> Result<Vec<u8>> {
#[derive(Debug)]
enum WitBindgenVersion {
GreaterThanV0_4,
V0_2,
V0_2OrNone,
Other(String),
}

impl WitBindgenVersion {
fn from_module(module: &[u8]) -> Result<Self> {
let (_, bindgen) = metadata::decode(module)?;
if let Some(producers) = bindgen.producers {
if let Some(processors) = producers.get("processed-by") {
let bindgen_version = processors.iter().find_map(|(key, value)| {
key.starts_with("wit-bindgen").then_some(value.as_str())
});
if let Some(v) = bindgen_version {
let mut parts = v.split('.');
let Some(major) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
let Some(minor) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
if (major == 0 && minor < 5) || major >= 1 {
return Ok(Self::Other(v.to_owned()));
}
// Either there should be no patch version or nothing after patch
if parts.next().is_none() || parts.next().is_none() {
return Ok(Self::GreaterThanV0_4);
} else {
return Ok(Self::Other(v.to_owned()));
}
fn detect(module_info: &ModuleInfo) -> Result<Self> {
if let Some(processors) = module_info.bindgen_processors() {
let bindgen_version = processors
.iter()
.find_map(|(key, value)| key.starts_with("wit-bindgen").then_some(value.as_str()));
if let Some(v) = bindgen_version {
let mut parts = v.split('.');
let Some(major) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
let Some(minor) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
if (major == 0 && minor < 5) || major >= 1 {
return Ok(Self::Other(v.to_owned()));
}
// Either there should be no patch version or nothing after patch
if parts.next().is_none() || parts.next().is_none() {
return Ok(Self::GreaterThanV0_4);
} else {
return Ok(Self::Other(v.to_owned()));
}
}
}

Ok(Self::V0_2)
Ok(Self::V0_2OrNone)
}
}

Expand All @@ -111,6 +110,18 @@ pub fn componentize_new_bindgen(module: &[u8]) -> Result<Vec<u8>> {
.encode()
}

/// Modules *not* produced with wit-bindgen >= 0.5 could be old wit-bindgen or no wit-bindgen
pub fn componentize_old_module(module: &[u8], module_info: &ModuleInfo) -> Result<Vec<u8>> {
// If the module has a _start export and doesn't obviously use wit-bindgen
// it is likely an old p1 command module.
if module_info.has_start_export && !module_info.probably_uses_wit_bindgen() {
bugs::WasiLibc377Bug::check(module_info)?;
componentize_command(module)
} else {
componentize_old_bindgen(module)
}
}

/// Modules produced with wit-bindgen 0.2 need more extensive adaption
pub fn componentize_old_bindgen(module: &[u8]) -> Result<Vec<u8>> {
let (module, exports) = retarget_imports_and_get_exports(ADAPTER_NAME, module)?;
Expand Down
111 changes: 111 additions & 0 deletions crates/componentize/src/module_info.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
use wasm_metadata::Producers;
use wasmparser::{Encoding, ExternalKind, Parser, Payload};
use wit_component::metadata::Bindgen;

// wit-bindgen has used both of these historically.
const CANONICAL_ABI_REALLOC_EXPORTS: &[&str] = &["cabi_realloc", "canonical_abi_realloc"];

/// Stores various bits of info parsed from a Wasm module that are relevant to
/// componentization.
#[derive(Default)]
pub struct ModuleInfo {
pub bindgen: Option<Bindgen>,
pub clang_version: Option<String>,
pub realloc_export: Option<String>,
pub has_start_export: bool,
}

impl ModuleInfo {
/// Parses info from the given binary module bytes.
pub fn from_module(module: &[u8]) -> anyhow::Result<Self> {
let mut info = Self::default();
for payload in Parser::new(0).parse_all(module) {
match payload? {
Payload::Version { encoding, .. } => {
anyhow::ensure!(
encoding == Encoding::Module,
"ModuleInfo::from_module is only applicable to Modules; got a {encoding:?}"
);
}
Payload::ExportSection(reader) => {
for export in reader {
let export = export?;
if export.kind == ExternalKind::Func {
if CANONICAL_ABI_REALLOC_EXPORTS.contains(&export.name) {
tracing::debug!(
"Found canonical ABI realloc export {:?}",
export.name
);
info.realloc_export = Some(export.name.to_string());
} else if export.name == "_start" {
tracing::debug!("Found _start export");
info.has_start_export = true;
}
}
}
}
Payload::CustomSection(c) => {
let section_name = c.name();
if section_name == "producers" {
let producers = Producers::from_bytes(c.data(), c.data_offset())?;
if let Some(clang_version) =
producers.get("processed-by").and_then(|f| f.get("clang"))
{
tracing::debug!(clang_version, "Parsed producers.processed-by.clang");
info.clang_version = Some(clang_version.to_string());
}
} else if section_name.starts_with("component-type") {
match decode_bindgen_custom_section(section_name, c.data()) {
Ok(bindgen) => {
tracing::debug!("Parsed bindgen section {section_name:?}");
info.bindgen = Some(bindgen);
}
Err(err) => tracing::warn!(
"Error parsing bindgen section {section_name:?}: {err}"
),
}
}
}
_ => (),
}
}
Ok(info)
}

/// Returns true if the given module was heuristically probably compiled
/// with wit-bindgen.
pub fn probably_uses_wit_bindgen(&self) -> bool {
if self.bindgen.is_some() {
// Presence of bindgen metadata is a strong signal
true
} else if self.realloc_export.is_some() {
// A canonical ABI realloc export is a decent signal
true
} else {
false
}
}

/// Returns the wit-bindgen metadata producers processed-by field, if
/// present.
pub fn bindgen_processors(&self) -> Option<wasm_metadata::ProducersField> {
self.bindgen
.as_ref()?
.producers
.as_ref()?
.get("processed-by")
}
}

/// This is a silly workaround for the limited public interface available in
/// [`wit_component::metadata`].
// TODO: Make Bindgen::decode_custom_section public?
fn decode_bindgen_custom_section(name: &str, data: &[u8]) -> anyhow::Result<Bindgen> {
let mut module = wasm_encoder::Module::new();
module.section(&wasm_encoder::CustomSection {
name: name.into(),
data: data.into(),
});
let (_, bindgen) = wit_component::metadata::decode(module.as_slice())?;
Ok(bindgen)
}
Loading

0 comments on commit b9ed65a

Please sign in to comment.