diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index db1f8173..2046a6ee 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -18,16 +18,22 @@ jobs: run: sudo apt-get install -y lld - name: Install target -- i686-unknown-linux-musl run: rustup target add i686-unknown-linux-musl + - name: Install target -- x86_64-unknown-freebsd + run: rustup target add x86_64-unknown-freebsd - name: Build and test (generic) run: ./ci/jobs/build-and-test.sh - name: Build and test (Linux-only) run: ./ci/jobs/build-and-test-linux.sh + - name: Check (FreeBSD) + run: ./ci/jobs/check-freebsd.sh build-and-test-macos: runs-on: macos-latest steps: - uses: actions/checkout@v4 - name: Build and test run: ./ci/jobs/build-and-test.sh + - name: Build and test (macOS-only) + run: ./ci/jobs/build-and-test-macos.sh build-and-test-windows: runs-on: windows-latest steps: diff --git a/Cargo.lock b/Cargo.lock index ac61b47f..e16ed172 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -194,12 +194,30 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "doom-host" +version = "0.1.0" +dependencies = [ + "env_logger", + "polkavm", + "sdl2", +] + [[package]] name = "either" version = "1.9.0" @@ -452,10 +470,11 @@ name = "polkavm" version = "0.2.0" dependencies = [ "env_logger", + "libc", "log", "polkavm-assembler", "polkavm-common", - "polkavm-linux-sandbox", + "polkavm-linux-raw", ] [[package]] @@ -515,17 +534,6 @@ dependencies = [ "bindgen", ] -[[package]] -name = "polkavm-linux-sandbox" -version = "0.2.0" -dependencies = [ - "env_logger", - "log", - "polkavm-assembler", - "polkavm-common", - "polkavm-linux-raw", -] - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -707,6 +715,30 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "sdl2" +version = "0.35.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7959277b623f1fb9e04aea73686c3ca52f01b2145f8ea16f4ff30d8b7623b1a" +dependencies = [ + "bitflags 1.3.2", + "lazy_static", + "libc", + "sdl2-sys", +] + +[[package]] +name = "sdl2-sys" +version = "0.35.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3586be2cf6c0a8099a79a12b4084357aa9b3e0b0d7980e3b67aaf7a9d55f9f0" +dependencies = [ + "cfg-if", + "cmake", + "libc", + "version-compare", +] + [[package]] name = "shlex" version = "1.1.0" @@ -768,6 +800,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version-compare" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "579a42fc0b8e0c63b76519a339be31bed574929511fa53c1a3acae26eb258f29" + [[package]] name = "wait-timeout" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index d56cddb3..75f18820 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,13 +6,13 @@ members = [ "crates/polkavm-assembler", "crates/polkavm-common", "crates/polkavm-linux-raw", - "crates/polkavm-linux-sandbox", "crates/polkavm", "tools/polkatool", "tools/polkavm-linux-raw-generate", "examples/hosts/hello-world", + "examples/hosts/doom", ] [workspace.package] @@ -31,13 +31,13 @@ polkavm-derive = { version = "0.2.0", path = "crates/polkavm-derive" } polkavm-derive-impl = { version = "0.2.0", path = "crates/polkavm-derive-impl" } polkavm-linker = { version = "0.2.0", path = "crates/polkavm-linker" } polkavm-linux-raw = { version = "0.2.0", path = "crates/polkavm-linux-raw" } -polkavm-linux-sandbox = { version = "0.2.0", path = "crates/polkavm-linux-sandbox" } clap = "4.4.1" env_logger = { version = "0.10.0", default-features = false } gimli = { version = "0.28.0", default-features = false } hashbrown = { version = "0.14.0", default-features = false } iced-x86 = "1.19.0" +libc = "0.2.146" log = "0.4.20" object = { version = "0.32.0", default-features = false } proc-macro2 = "1.0.63" diff --git a/ci/jobs/build-and-test-linux.sh b/ci/jobs/build-and-test-linux.sh index 020a0aaa..28faf08b 100755 --- a/ci/jobs/build-and-test-linux.sh +++ b/ci/jobs/build-and-test-linux.sh @@ -9,5 +9,14 @@ cd crates/polkavm-zygote cargo build --release cd ../.. -echo ">> cargo run (examples, musl)" -POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 cargo run --target=i686-unknown-linux-musl -p hello-world-host +echo ">> cargo run (examples, interpreter, i686-unknown-linux-musl)" +POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=interpreter cargo run --target=i686-unknown-linux-musl -p hello-world-host + +echo ">> cargo run (examples, interpreter, x86_64-unknown-linux-gnu)" +POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=interpreter cargo run --target=x86_64-unknown-linux-gnu -p hello-world-host + +echo ">> cargo run (examples, compiler, linux, x86_64-unknown-linux-gnu)" +POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=compiler POLKAVM_SANDBOX=linux cargo run --target=x86_64-unknown-linux-gnu -p hello-world-host + +echo ">> cargo run (examples, compiler, generic, x86_64-unknown-linux-gnu)" +POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=compiler POLKAVM_SANDBOX=generic cargo run --target=x86_64-unknown-linux-gnu -p hello-world-host diff --git a/ci/jobs/build-and-test-macos.sh b/ci/jobs/build-and-test-macos.sh new file mode 100755 index 00000000..4d1fc092 --- /dev/null +++ b/ci/jobs/build-and-test-macos.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -euo pipefail +cd -- "$(dirname -- "${BASH_SOURCE[0]}")" +cd ../.. + +echo ">> cargo run (examples, interpreter, x86_64-apple-darwin)" +POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=interpreter cargo run --target=x86_64-apple-darwin -p hello-world-host + +echo ">> cargo run (examples, compiler, generic, x86_64-apple-darwin)" +POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=compiler POLKAVM_SANDBOX=generic cargo run --target=x86_64-apple-darwin -p hello-world-host diff --git a/ci/jobs/check-freebsd.sh b/ci/jobs/check-freebsd.sh new file mode 100755 index 00000000..ccd35a8e --- /dev/null +++ b/ci/jobs/check-freebsd.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -euo pipefail +cd -- "$(dirname -- "${BASH_SOURCE[0]}")" +cd ../.. + +echo ">> cargo check (freebsd)" +cd crates/polkavm +cargo check --target=x86_64-unknown-freebsd +cd ../.. diff --git a/ci/run-all-tests.sh b/ci/run-all-tests.sh index 805b3505..ed97464b 100755 --- a/ci/run-all-tests.sh +++ b/ci/run-all-tests.sh @@ -14,7 +14,14 @@ fi case "$OSTYPE" in linux*) ./ci/jobs/build-and-test-linux.sh + ;; + darwin*) + ./ci/jobs/build-and-test-macos.sh + ;; esac + +./ci/jobs/check-freebsd.sh + ./ci/jobs/clippy.sh ./ci/jobs/rustfmt.sh diff --git a/crates/polkavm-assembler/src/amd64.rs b/crates/polkavm-assembler/src/amd64.rs index 6fa56cbf..f8249b8f 100644 --- a/crates/polkavm-assembler/src/amd64.rs +++ b/crates/polkavm-assembler/src/amd64.rs @@ -1197,6 +1197,30 @@ pub mod inst { Some((self.1, 2, 4)) } } + + #[derive(Copy, Clone, PartialEq, Eq, Debug)] + pub struct lea_rip_label(pub Reg, pub Label); + impl lea_rip_label { + const fn encode_const(self) -> EncInst { + lea_rip(RegSize::R64, self.0, 0).encode_const() + } + } + + impl core::fmt::Display for lea_rip_label { + fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { + fmt.write_fmt(core::format_args!("lea {}, [{}]", self.0, self.1)) + } + } + + impl crate::Instruction for lea_rip_label { + fn encode(self) -> EncInst { + self.encode_const() + } + + fn target_fixup(self) -> Option<(Label, u8, u8)> { + Some((self.1, 3, 4)) + } + } } #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -1693,4 +1717,25 @@ mod tests { ); }); } + + #[test] + fn lea_rip_label_infinite_loop() { + use super::inst::*; + let mut asm = crate::Assembler::new(); + let label = asm.forward_declare_label(); + asm.push_with_label(label, lea_rip_label(super::Reg::rax, label)); + let disassembly = disassemble(asm.finalize()); + assert_eq!(disassembly, "00000000 488d05f9ffffff lea rax, [rip-0x7]"); + } + + #[test] + fn lea_rip_label_next_instruction() { + use super::inst::*; + let mut asm = crate::Assembler::new(); + let label = asm.forward_declare_label(); + asm.push(lea_rip_label(super::Reg::rax, label)); + asm.push_with_label(label, nop()); + let disassembly = disassemble(asm.finalize()); + assert_eq!(disassembly, "00000000 488d0500000000 lea rax, [rip]\n00000007 90 nop"); + } } diff --git a/crates/polkavm-assembler/src/assembler.rs b/crates/polkavm-assembler/src/assembler.rs index 7987e0ec..ced24022 100644 --- a/crates/polkavm-assembler/src/assembler.rs +++ b/crates/polkavm-assembler/src/assembler.rs @@ -10,7 +10,7 @@ struct Fixup { pub struct Assembler { origin: u64, code: Vec, - labels: Vec, + labels: Vec, fixups: Vec, } @@ -41,19 +41,19 @@ impl Assembler { pub fn forward_declare_label(&mut self) -> Label { let label = self.labels.len(); - self.labels.push(usize::MAX); + self.labels.push(isize::MAX); Label(label) } pub fn create_label(&mut self) -> Label { let label = self.labels.len(); - self.labels.push(self.code.len()); + self.labels.push(self.code.len() as isize); Label(label) } pub fn define_label(&mut self, label: Label) -> &mut Self { - assert_eq!(self.labels[label.0], usize::MAX, "tried to redefine an already defined label"); - self.labels[label.0] = self.code.len(); + assert_eq!(self.labels[label.0], isize::MAX, "tried to redefine an already defined label"); + self.labels[label.0] = self.code.len() as isize; self } @@ -62,12 +62,16 @@ impl Assembler { self.push(inst) } - pub fn get_label_offset(&self, label: Label) -> usize { + pub fn get_label_offset(&self, label: Label) -> isize { let offset = self.labels[label.0]; - assert_ne!(offset, usize::MAX, "tried to fetch a label offset for a label that was not defined"); + assert_ne!(offset, isize::MAX, "tried to fetch a label offset for a label that was not defined"); offset } + pub fn set_label_offset(&mut self, label: Label, offset: isize) { + self.labels[label.0] = offset; + } + fn add_fixup_if_necessary(&mut self, bytes: &[u8], inst: impl Instruction) { let (target_label, fixup_offset, fixup_length) = match inst.target_fixup() { Some(fixup) => fixup, @@ -106,8 +110,8 @@ impl Assembler { for fixup in self.fixups.drain(..) { let origin = fixup.instruction_offset + fixup.instruction_length as usize; let target_absolute = self.labels[fixup.target_label.0]; - assert_ne!(target_absolute, usize::MAX); - let offset = target_absolute as isize - origin as isize; + assert_ne!(target_absolute, isize::MAX); + let offset = target_absolute - origin as isize; let p = fixup.instruction_offset + fixup.fixup_offset as usize; if fixup.fixup_length == 1 { if offset > i8::MAX as isize || offset < i8::MIN as isize { @@ -134,6 +138,10 @@ impl Assembler { self.code.len() } + pub fn resize(&mut self, size: usize, fill_with: u8) { + self.code.resize(size, fill_with) + } + pub fn clear(&mut self) { self.origin = 0; self.code.clear(); diff --git a/crates/polkavm-common/src/abi.rs b/crates/polkavm-common/src/abi.rs index c35d18f1..921ef75c 100644 --- a/crates/polkavm-common/src/abi.rs +++ b/crates/polkavm-common/src/abi.rs @@ -143,19 +143,21 @@ impl GuestMemoryConfig { /// The address at where the program memory starts inside of the VM. #[inline] - pub const fn user_memory_address(self) -> u32 { + pub const fn user_memory_region_address(self) -> u32 { VM_ADDR_USER_MEMORY } - /// The size of the program memory inside of the VM, excluding the stack. + /// The size of the region in which the program memory resides inside of the VM, excluding the stack. + /// + /// This also includes the guard page between the read-only data and read-write data. #[inline] - pub const fn user_memory_size(self) -> u32 { - self.ro_data_size + self.rw_data_size + self.bss_size + pub const fn user_memory_region_size(self) -> u32 { + (self.bss_address() + self.bss_size()) - self.user_memory_region_address() } /// Resets the size of the program memory to zero, excluding the stack. #[inline] - pub fn clear_user_memory_size(&mut self) { + pub fn clear_user_memory_sizes(&mut self) { self.ro_data_size = 0; self.rw_data_size = 0; self.bss_size = 0; @@ -164,7 +166,7 @@ impl GuestMemoryConfig { /// The address at where the program's read-only data starts inside of the VM. #[inline] pub const fn ro_data_address(self) -> u32 { - self.user_memory_address() + self.user_memory_region_address() } /// The size of the program's read-only data. @@ -199,7 +201,7 @@ impl GuestMemoryConfig { #[inline] pub const fn rw_data_address(self) -> u32 { if self.ro_data_size == 0 { - self.user_memory_address() + self.user_memory_region_address() } else { self.ro_data_address() + self.ro_data_size + VM_PAGE_SIZE } diff --git a/crates/polkavm-common/src/program.rs b/crates/polkavm-common/src/program.rs index a713e735..9c0d8358 100644 --- a/crates/polkavm-common/src/program.rs +++ b/crates/polkavm-common/src/program.rs @@ -1625,7 +1625,7 @@ impl<'a> LineProgram<'a> { } // Put an upper limit to how many instructions we'll process. - const INSTRUCTION_LIMIT_PER_REGION: usize = 128; + const INSTRUCTION_LIMIT_PER_REGION: usize = 256; let mark_as_finished_on_drop = SetTrueOnDrop(&mut self.is_finished); for _ in 0..INSTRUCTION_LIMIT_PER_REGION { diff --git a/crates/polkavm-common/src/utils.rs b/crates/polkavm-common/src/utils.rs index 5fba9979..dfeed291 100644 --- a/crates/polkavm-common/src/utils.rs +++ b/crates/polkavm-common/src/utils.rs @@ -182,7 +182,7 @@ impl AsUninitSliceMut for [u8; N] { } pub trait Access<'a> { - type Error; + type Error: core::fmt::Display; fn get_reg(&self, reg: Reg) -> u32; fn set_reg(&mut self, reg: Reg, value: u32); diff --git a/crates/polkavm-common/src/zygote.rs b/crates/polkavm-common/src/zygote.rs index bec10612..c601403b 100644 --- a/crates/polkavm-common/src/zygote.rs +++ b/crates/polkavm-common/src/zygote.rs @@ -176,11 +176,6 @@ pub const VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION: u32 = 1 << 1; /// A flag which will trigger the sandbox to unload its program after execution. pub const VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION: u32 = 1 << 2; -/// A flag which will trigger the sandbox to send a SIGSTOP to itself before execution. -/// -/// Mostly useful for debugging. -pub const VM_RPC_FLAG_SIGSTOP_BEFORE_EXECUTION: u32 = 1 << 3; - #[repr(C)] pub struct VmInit { pub stack_address: AtomicU64, @@ -194,7 +189,7 @@ pub struct VmInit { const MESSAGE_BUFFER_SIZE: usize = 512; #[repr(align(64))] -pub struct CacheAligned(T); +pub struct CacheAligned(pub T); impl core::ops::Deref for CacheAligned { type Target = T; diff --git a/crates/polkavm-linker/src/dwarf.rs b/crates/polkavm-linker/src/dwarf.rs index 6a36fb94..50b549f1 100644 --- a/crates/polkavm-linker/src/dwarf.rs +++ b/crates/polkavm-linker/src/dwarf.rs @@ -1,9 +1,10 @@ use crate::elf::{Elf, Section, SectionIndex}; use crate::fast_range_map::RangeMap; use crate::program_from_elf::{AddressRange, RelocationKind, RelocationSize, SectionTarget, SizeRelocationSize, Source}; +use crate::reader_wrapper::ReaderWrapper; use crate::utils::StringCache; use crate::ProgramFromElfError; -use gimli::{Reader, ReaderOffset}; +use gimli::{LineInstruction, Reader, ReaderOffset}; use polkavm_common::program::FrameKind; use std::collections::{BTreeMap, HashMap}; use std::sync::Arc; @@ -54,6 +55,118 @@ struct AttributeParser { recursion_limit: usize, } +fn parse_ranges( + sections: &Sections, + relocations: &BTreeMap, + unit: &gimli::Unit, + mut base: Option, + ranges_offset: gimli::RangeListsOffset<::Offset>, + mut callback: impl FnMut(Source), +) -> Result<(), ProgramFromElfError> +where + R: gimli::Reader, +{ + if unit.encoding().version <= 4 { + let Some(section) = sections.debug_ranges else { + return Err(ProgramFromElfError::other( + "failed to process DWARF: missing '.debug_ranges' section", + )); + }; + + let mut reader = gimli::read::EndianSlice::new(section.data(), gimli::LittleEndian); + let start = reader; + reader.skip(ranges_offset.0.into_u64() as usize)?; + + let address_size = unit.encoding().address_size; + let offset_start = reader.offset_from(start); + let _ = reader.read_address(address_size)?; + let offset_end = reader.offset_from(start); + let _ = reader.read_address(address_size)?; + + let relocation_start = SectionTarget { + section_index: section.index(), + offset: offset_start.into_u64(), + }; + + let relocation_end = SectionTarget { + section_index: section.index(), + offset: offset_end.into_u64(), + }; + + let (start_section, start_range) = fetch_size_relocation(relocations, relocation_start)?; + let (end_section, end_range) = fetch_size_relocation(relocations, relocation_end)?; + + if start_section != end_section { + return Err(ProgramFromElfError::other( + "failed to process DWARF: '.debug_ranges' has a pair of relocations pointing to different sections", + )); + } + + let source = Source { + section_index: start_section, + offset_range: (start_range.end..end_range.end).into(), + }; + + log::trace!(" Range from debug ranges: {}", source); + callback(source); + } else { + let Some(section) = sections.debug_rnglists else { + return Err(ProgramFromElfError::other( + "failed to process DWARF: missing '.debug_rnglists' section", + )); + }; + + let mut reader = gimli::read::EndianSlice::new(section.data(), gimli::LittleEndian); + reader.skip(ranges_offset.0.into_u64() as usize)?; + + loop { + let kind = gimli::constants::DwRle(reader.read_u8()?); + match kind { + gimli::constants::DW_RLE_end_of_list => break, + gimli::constants::DW_RLE_offset_pair => { + let offset_start = reader.read_uleb128()?; + let offset_end = reader.read_uleb128()?; + if let Some(base) = base { + let source = Source { + section_index: base.section_index, + offset_range: (base.offset + offset_start..base.offset + offset_end).into(), + }; + + log::trace!(" Range from low_pc + high_pc (rel): {}", source); + callback(source); + } else if false { + return Err(ProgramFromElfError::other( + "failed to process DWARF: found DW_RLE_offset_pair yet we have no base address", + )); + } + } + gimli::constants::DW_RLE_startx_length => { + let begin = gimli::DebugAddrIndex(reader.read_uleb128().and_then(R::Offset::from_u64)?); + let length = reader.read_uleb128()?; + if let Some(target) = resolve_debug_addr_index(sections.debug_addr, relocations, unit, begin)? { + let source = Source { + section_index: target.section_index, + offset_range: (target.offset..target.offset + length).into(), + }; + callback(source) + } + } + gimli::constants::DW_RLE_base_addressx => { + let begin = gimli::DebugAddrIndex(reader.read_uleb128().and_then(R::Offset::from_u64)?); + base = resolve_debug_addr_index(sections.debug_addr, relocations, unit, begin)?; + } + _ => { + return Err(ProgramFromElfError::other(format!( + "failed to process DWARF: unhandled entry kind in '.debug_rnglists': {kind}" + ))); + } + } + } + } + + Ok(()) +} + impl AttributeParser { fn new(depth: usize) -> Self { AttributeParser { @@ -83,82 +196,7 @@ impl AttributeParser { mut callback: impl FnMut(Source), ) -> Result<(), ProgramFromElfError> { if let Some(ranges_offset) = self.ranges_offset { - if unit.raw_unit.encoding().version <= 4 { - let Some(section) = sections.debug_ranges else { - return Err(ProgramFromElfError::other( - "failed to process DWARF: missing '.debug_ranges' section", - )); - }; - - let mut reader = gimli::read::EndianSlice::new(section.data(), gimli::LittleEndian); - let start = reader; - reader.skip(ranges_offset.0.into_u64() as usize)?; - - let address_size = unit.raw_unit.encoding().address_size; - let offset_start = reader.offset_from(start); - let _ = reader.read_address(address_size)?; - let offset_end = reader.offset_from(start); - let _ = reader.read_address(address_size)?; - - let relocation_start = SectionTarget { - section_index: section.index(), - offset: offset_start.into_u64(), - }; - - let relocation_end = SectionTarget { - section_index: section.index(), - offset: offset_end.into_u64(), - }; - - let (start_section, start_range) = fetch_size_relocation(relocations, relocation_start)?; - let (end_section, end_range) = fetch_size_relocation(relocations, relocation_end)?; - - if start_section != end_section { - return Err(ProgramFromElfError::other( - "failed to process DWARF: '.debug_ranges' has a pair of relocations pointing to different sections", - )); - } - - let source = Source { - section_index: start_section, - offset_range: (start_range.end..end_range.end).into(), - }; - - log::trace!(" Range from debug ranges: {}", source); - callback(source); - } else { - let Some(section) = sections.debug_rnglists else { - return Err(ProgramFromElfError::other( - "failed to process DWARF: missing '.debug_rnglists' section", - )); - }; - - let mut reader = gimli::read::EndianSlice::new(section.data(), gimli::LittleEndian); - reader.skip(ranges_offset.0.into_u64() as usize)?; - - loop { - let kind = gimli::constants::DwRle(reader.read_u8()?); - match kind { - gimli::constants::DW_RLE_end_of_list => break, - gimli::constants::DW_RLE_offset_pair => { - let offset_start = reader.read_uleb128()?; - let offset_end = reader.read_uleb128()?; - let source = Source { - section_index: unit.low_pc.section_index, - offset_range: (unit.low_pc.offset + offset_start..unit.low_pc.offset + offset_end).into(), - }; - - log::trace!(" Range from low_pc + high_pc (rel): {}", source); - callback(source); - } - _ => { - return Err(ProgramFromElfError::other(format!( - "failed to process DWARF: unhandled entry kind in '.debug_rnglists': {kind}" - ))); - } - } - } - }; + parse_ranges::(sections, relocations, &unit.raw_unit, unit.low_pc, ranges_offset, callback)?; } else if let (Some(low_pc), Some(high_pc)) = (self.low_pc, self.high_pc) { if low_pc.section_index != high_pc.section_index { return Err(ProgramFromElfError::other( @@ -209,16 +247,16 @@ impl AttributeParser { dwarf: &gimli::Dwarf, unit: &Unit, name: gimli::constants::DwAt, - value: ValueOrOffset, + value: AttributeValue, ) -> Result<(), ProgramFromElfError> { log::trace!("{:->depth$}{name}", ">", depth = self.depth); - struct UnsupportedValue(ValueOrOffset) + struct UnsupportedValue(AttributeValue) where R: gimli::Reader; match name { gimli::DW_AT_low_pc => match value.clone() { - ValueOrOffset::Offset(offset) => { + AttributeValue { offset: Some(offset), .. } => { let relocation_target = SectionTarget { section_index: sections.debug_info.index(), offset: offset.into_u64(), @@ -230,16 +268,23 @@ impl AttributeParser { Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::DebugAddrIndex(index)) => { - let value = resolve_debug_addr_index(sections.debug_addr, relocations, &unit.raw_unit, index)?; - self.low_pc = Some(value); - log::trace!(" = {value} ({index:?})"); + AttributeValue { + value: gimli::AttributeValue::DebugAddrIndex(index), + .. + } => { + self.low_pc = resolve_debug_addr_index(sections.debug_addr, relocations, &unit.raw_unit, index)?; + if let Some(value) = self.low_pc { + log::trace!(" = {value} ({index:?})"); + } else { + log::trace!(" = None ({index:?})"); + } + Ok(()) } _ => Err(UnsupportedValue(value)), }, gimli::DW_AT_high_pc => match value { - ValueOrOffset::Offset(offset) => { + AttributeValue { offset: Some(offset), .. } => { let relocation_target = SectionTarget { section_index: sections.debug_info.index(), offset: offset.into_u64(), @@ -251,18 +296,31 @@ impl AttributeParser { Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::DebugAddrIndex(index)) => { - let value = resolve_debug_addr_index(sections.debug_addr, relocations, &unit.raw_unit, index)?; - self.high_pc = Some(value); - log::trace!(" = {value} ({index:?})"); + AttributeValue { + value: gimli::AttributeValue::DebugAddrIndex(index), + .. + } => { + self.high_pc = resolve_debug_addr_index(sections.debug_addr, relocations, &unit.raw_unit, index)?; + if let Some(value) = self.high_pc { + log::trace!(" = {value} ({index:?})"); + } else { + log::trace!(" = None ({index:?})"); + } + Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::Udata(value)) => { + AttributeValue { + value: gimli::AttributeValue::Udata(value), + .. + } => { log::trace!(" = DW_AT_low_pc + {value} (size/udata)"); self.size = Some(value); Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::Data4(value)) => { + AttributeValue { + value: gimli::AttributeValue::Data4(value), + .. + } => { log::trace!(" = DW_AT_low_pc + {value} (size/data4)"); self.size = Some(value as u64); Ok(()) @@ -270,22 +328,31 @@ impl AttributeParser { _ => Err(UnsupportedValue(value)), }, gimli::DW_AT_ranges => match value { - ValueOrOffset::Value(gimli::AttributeValue::RangeListsRef(offset)) => { + AttributeValue { + value: gimli::AttributeValue::RangeListsRef(offset), + .. + } => { self.ranges_offset = Some(dwarf.ranges_offset_from_raw(&unit.raw_unit, offset)); Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::DebugRngListsIndex(index)) => { + AttributeValue { + value: gimli::AttributeValue::DebugRngListsIndex(index), + .. + } => { self.ranges_offset = Some(dwarf.ranges_offset(&unit.raw_unit, index)?); Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::SecOffset(offset)) => { + AttributeValue { + value: gimli::AttributeValue::SecOffset(offset), + .. + } => { self.ranges_offset = Some(dwarf.ranges_offset_from_raw(&unit.raw_unit, gimli::RawRangeListsOffset(offset))); Ok(()) } _ => Err(UnsupportedValue(value)), }, gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => { - if let ValueOrOffset::Value(value) = value { + if let AttributeValue { value, offset: None } = value { self.linkage_name = Some(value); Ok(()) } else { @@ -293,7 +360,7 @@ impl AttributeParser { } } gimli::DW_AT_name => { - if let ValueOrOffset::Value(value) = value { + if let AttributeValue { value, offset: None } = value { self.name = Some(value); Ok(()) } else { @@ -305,11 +372,17 @@ impl AttributeParser { log::trace!(" = {:?}", value); match value { - ValueOrOffset::Value(gimli::AttributeValue::UnitRef(offset)) => { + AttributeValue { + value: gimli::AttributeValue::UnitRef(offset), + .. + } => { self.abstract_origin = Some(offset.to_debug_info_offset(&unit.raw_unit.header).unwrap()); Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::DebugInfoRef(target_offset)) => { + AttributeValue { + value: gimli::AttributeValue::DebugInfoRef(target_offset), + .. + } => { self.abstract_origin = Some(target_offset); Ok(()) } @@ -317,29 +390,45 @@ impl AttributeParser { } } gimli::DW_AT_decl_file => match value { - ValueOrOffset::Value(gimli::AttributeValue::FileIndex(index)) => { + AttributeValue { + value: gimli::AttributeValue::FileIndex(index), + .. + } => { self.decl_file = Some(index as usize); Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::Data1(index)) => { + AttributeValue { + value: gimli::AttributeValue::Data1(index), + .. + } => { self.decl_file = Some(index as usize); Ok(()) } _ => Err(UnsupportedValue(value)), }, gimli::DW_AT_call_file => match value { - ValueOrOffset::Value(gimli::AttributeValue::FileIndex(index)) => { + AttributeValue { + value: gimli::AttributeValue::FileIndex(index), + .. + } => { self.call_file = Some(index as usize); Ok(()) } - ValueOrOffset::Value(gimli::AttributeValue::Data1(index)) => { + AttributeValue { + value: gimli::AttributeValue::Data1(index), + .. + } => { self.call_file = Some(index as usize); Ok(()) } _ => Err(UnsupportedValue(value)), }, gimli::DW_AT_decl_line => { - if let ValueOrOffset::Value(ref inner) = value { + if let AttributeValue { + value: ref inner, + offset: None, + } = value + { if let Some(value) = inner.udata_value() { self.decl_line = Some(value as u32); Ok(()) @@ -351,7 +440,11 @@ impl AttributeParser { } } gimli::DW_AT_call_line => { - if let ValueOrOffset::Value(ref inner) = value { + if let AttributeValue { + value: ref inner, + offset: None, + } = value + { if let Some(value) = inner.udata_value() { self.call_line = Some(value as u32); Ok(()) @@ -363,7 +456,11 @@ impl AttributeParser { } } gimli::DW_AT_call_column => { - if let ValueOrOffset::Value(ref inner) = value { + if let AttributeValue { + value: ref inner, + offset: None, + } = value + { if let Some(value) = inner.udata_value() { self.call_column = Some(value as u32); Ok(()) @@ -375,7 +472,10 @@ impl AttributeParser { } } gimli::DW_AT_declaration => match value { - ValueOrOffset::Value(gimli::AttributeValue::Flag(value)) => { + AttributeValue { + value: gimli::AttributeValue::Flag(value), + .. + } => { self.is_declaration = value; Ok(()) } @@ -459,16 +559,15 @@ struct Sections<'a> { debug_addr: Option<&'a Section<'a>>, debug_ranges: Option<&'a Section<'a>>, debug_rnglists: Option<&'a Section<'a>>, + debug_line: Option<&'a Section<'a>>, } -fn fetch_relocation( +fn try_fetch_relocation( relocations: &BTreeMap, relocation_target: SectionTarget, -) -> Result { +) -> Result, ProgramFromElfError> { let Some(relocation) = relocations.get(&relocation_target) else { - return Err(ProgramFromElfError::other(format!( - "failed to process DWARF: {relocation_target} has no relocation" - ))); + return Ok(None); }; let RelocationKind::Abs { @@ -481,23 +580,34 @@ fn fetch_relocation( ))); }; - Ok(*target) + Ok(Some(*target)) } -fn fetch_size_relocation( +fn fetch_relocation( relocations: &BTreeMap, relocation_target: SectionTarget, -) -> Result<(SectionIndex, AddressRange), ProgramFromElfError> { - let Some(relocation) = relocations.get(&relocation_target) else { - return Err(ProgramFromElfError::other(format!( +) -> Result { + if let Some(target) = try_fetch_relocation(relocations, relocation_target)? { + Ok(target) + } else { + Err(ProgramFromElfError::other(format!( "failed to process DWARF: {relocation_target} has no relocation" - ))); + ))) + } +} + +fn try_fetch_size_relocation( + relocations: &BTreeMap, + relocation_target: SectionTarget, +) -> Result, ProgramFromElfError> { + let Some(relocation) = relocations.get(&relocation_target) else { + return Ok(None); }; let RelocationKind::Size { section_index, range, - size: SizeRelocationSize::Generic(RelocationSize::U32), + size: SizeRelocationSize::Generic(..), } = relocation else { return Err(ProgramFromElfError::other(format!( @@ -505,7 +615,20 @@ fn fetch_size_relocation( ))); }; - Ok((*section_index, *range)) + Ok(Some((*section_index, *range))) +} + +fn fetch_size_relocation( + relocations: &BTreeMap, + relocation_target: SectionTarget, +) -> Result<(SectionIndex, AddressRange), ProgramFromElfError> { + if let Some(target) = try_fetch_size_relocation(relocations, relocation_target)? { + Ok(target) + } else { + Err(ProgramFromElfError::other(format!( + "failed to process DWARF: {relocation_target} has no relocation" + ))) + } } fn resolve_debug_addr_index( @@ -513,7 +636,7 @@ fn resolve_debug_addr_index( relocations: &BTreeMap, unit: &gimli::Unit, index: gimli::DebugAddrIndex, -) -> Result +) -> Result, ProgramFromElfError> where R: gimli::Reader, { @@ -525,7 +648,7 @@ where offset, }; - fetch_relocation(relocations, relocation_target) + try_fetch_relocation(relocations, relocation_target) } else { Err(ProgramFromElfError::other("failed to process DWARF: missing '.debug_addr' section")) } @@ -632,18 +755,90 @@ where { offset: gimli::DebugInfoOffset, raw_unit: gimli::Unit, - low_pc: SectionTarget, + low_pc: Option, paths: Vec>, } -fn extract_lines(unit: &Unit) -> Result, ProgramFromElfError> +fn extract_lines( + section_index: SectionIndex, + relocations: &BTreeMap, + unit: &Unit>, +) -> Result, ProgramFromElfError> where R: gimli::Reader, { let mut lines = Vec::new(); - if let Some(program) = unit.raw_unit.line_program.clone() { - let mut iter = program.rows(); - while let Some((_, row)) = iter.next_row()? { + if let Some(mut program) = unit.raw_unit.line_program.clone() { + let mut row = gimli::LineRow::new(program.header()); + let mut iter = program.header().instructions(); + + let input = program.header().raw_program_buf(); + let mut target = None; + loop { + row.reset(program.header()); + let tracker = input.start_tracking(); + let Some(instruction) = iter.next_instruction(program.header())? else { + break; + }; + + match instruction { + LineInstruction::Special(..) + | LineInstruction::Copy + | LineInstruction::AdvanceLine(..) + | LineInstruction::SetFile(..) + | LineInstruction::SetColumn(..) + | LineInstruction::NegateStatement + | LineInstruction::SetBasicBlock + | LineInstruction::SetPrologueEnd + | LineInstruction::SetEpilogueBegin + | LineInstruction::SetIsa(..) + | LineInstruction::EndSequence + | LineInstruction::DefineFile(..) + | LineInstruction::SetDiscriminator(..) + | LineInstruction::UnknownStandard0(..) + | LineInstruction::UnknownStandard1(..) + | LineInstruction::UnknownStandardN(..) + | LineInstruction::UnknownExtended(..) => {} + + LineInstruction::AdvancePc(..) | LineInstruction::ConstAddPc => { + return Err(ProgramFromElfError::other( + "failed to process DWARF: unsupported line program instruction: {instruction:?}", + )); + } + + LineInstruction::SetAddress(..) => { + let relocation_target = SectionTarget { + section_index, + offset: *tracker.list().last().unwrap(), + }; + + target = try_fetch_relocation(relocations, relocation_target)?; + } + + LineInstruction::FixedAddPc(..) => { + let relocation_target = SectionTarget { + section_index, + offset: *tracker.list().last().unwrap(), + }; + + target = try_fetch_size_relocation(relocations, relocation_target)?.map(|(target_section_index, target_range)| { + SectionTarget { + section_index: target_section_index, + offset: target_range.end, + } + }); + } + } + + if !row.execute(instruction, &mut program) { + continue; + } + + let tombstone_address = !0 >> (64 - program.header().encoding().address_size * 8); + if row.address() == tombstone_address { + continue; + } + let Some(path) = unit.paths.get(row.file_index() as usize) else { return Err(ProgramFromElfError::other( "failed to process DWARF: out of bounds file index encountered when processing line programs", @@ -663,13 +858,6 @@ where }, }; - let offset = row - .address() - .checked_sub(unit.raw_unit.low_pc) - .expect("address underflow when parsing line program") - .checked_add(unit.low_pc.offset) - .expect("address overflow when parsing line program"); - struct Flags<'a>(&'a gimli::LineRow); impl<'a> core::fmt::Display for Flags<'a> { fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { @@ -712,15 +900,38 @@ where } } - log::trace!("Line entry: 0x{:x} (0x{offset:x}) {location:?} {}", row.address(), Flags(row),); + let Some(target) = target else { + // Sometimes the entries seem to not have any relocation attached to them + // and have all zeros set by the compiler, e.g. I've seen this as the end + // of the line program: + // + // 0x0009b494 [ 197,26] NS + // 0x0009b49c [ 215,36] NS + // 0x0009b4a0 [ 215, 5] + // 0x0009b4a4 [ 215, 5] ET + // 0x00000000 [2046, 0] NS uri: "libs/libcxx/include/string" + // 0x00000000 [2047, 9] NS PE + // 0x00000000 [2047, 9] NS ET + // 0x00000000 [ 259, 0] NS uri: "libs/libcxx/include/stdexcept" + // 0x00000000 [ 263, 5] NS PE + // 0x00000000 [ 263, 5] NS ET + log::trace!("Line entry without a relocation: {row:?}"); + continue; + }; - let entry = LineEntry { offset, location }; + log::trace!( + "Line entry: 0x{:x} (0x{offset:x}) {location:?} {}", + row.address(), + Flags(&row), + offset = target.offset + ); + let entry = LineEntry { target, location }; lines.push(entry); } } // These should already be sorted, but sort them anyway. - lines.sort_by_key(|entry| entry.offset); + lines.sort_by_key(|entry| entry.target.offset); Ok(lines) } @@ -770,8 +981,8 @@ where { sections: Sections<'a>, relocations: &'a BTreeMap, - dwarf: &'a gimli::Dwarf, - units: &'a [Unit], + dwarf: &'a gimli::Dwarf>, + units: &'a [Unit>], depth: usize, inline_depth: usize, namespace_buffer: Vec, @@ -811,7 +1022,12 @@ where }); subprograms_for_unit.push(subprograms); - let lines = extract_lines(unit)?; + let lines = if let Some(debug_line) = self.sections.debug_line { + extract_lines(debug_line.index(), self.relocations, unit)? + } else { + Default::default() + }; + lines_for_unit.push(lines); } @@ -885,17 +1101,23 @@ where } let mut location_map: HashMap> = HashMap::new(); - for ((subprograms, lines), unit) in subprograms_for_unit - .into_iter() - .zip(lines_for_unit.into_iter()) - .zip(self.units.iter()) - { - let line_boundaries: Vec = lines.iter().map(|entry| entry.offset).collect(); + for (subprograms, lines) in subprograms_for_unit.into_iter().zip(lines_for_unit.into_iter()) { + if !lines + .iter() + .all(|entry| entry.target.section_index == lines[0].target.section_index) + { + return Err(ProgramFromElfError::other( + "failed to process DWARF: inconsistent target section in line program entries", + )); + } + + let line_boundaries: Vec = lines.iter().map(|entry| entry.target.offset).collect(); let line_ranges = line_boundaries.windows(2).map(|w| w[0]..w[1]); let line_range_map: RangeMap = line_ranges.zip(lines.into_iter()).collect(); for subprogram in subprograms { let source = subprogram.sources[0]; + let section_index = source.section_index; log::trace!(" Frame: {}", source); let mut map: LocationsForOffset = BTreeMap::new(); @@ -1057,10 +1279,7 @@ where } } - let target = SectionTarget { - section_index: unit.low_pc.section_index, - offset, - }; + let target = SectionTarget { section_index, offset }; if let Some((ref last_list, ref last_arc_list)) = last_emitted { if list == *last_list { @@ -1117,7 +1336,7 @@ where Ok(location_map) } - fn parse_tree(&mut self, unit: &Unit) -> Result>, ProgramFromElfError> { + fn parse_tree(&mut self, unit: &Unit>) -> Result>, ProgramFromElfError> { assert!(self.namespace_buffer.is_empty()); assert!(self.subprograms.is_empty()); assert_eq!(self.depth, 0); @@ -1139,7 +1358,11 @@ where } } - fn walk(&mut self, unit: &Unit, node: gimli::EntriesTreeNode) -> Result>, ProgramFromElfError> { + fn walk( + &mut self, + unit: &Unit>, + node: gimli::EntriesTreeNode>, + ) -> Result>, ProgramFromElfError> { let buffer_initial_length = self.namespace_buffer.len(); let node_entry = node.entry(); let Some(node_offset) = node_entry.offset().to_debug_info_offset(&unit.raw_unit.header) else { @@ -1412,9 +1635,7 @@ where })?; if current_inlined.is_empty() { - return Err(ProgramFromElfError::other( - "failed to process DWARF: inline subroutine with no source", - )); + log::trace!("Found inline subroutine with no source! (name = {name:?})"); } } _ => {} @@ -1597,7 +1818,7 @@ where #[derive(Clone, PartialEq, Eq)] struct LineEntry { - offset: u64, + target: SectionTarget, location: SourceCodeLocation, } @@ -1608,34 +1829,45 @@ pub(crate) struct DwarfInfo { pub location_map: HashMap>, } -#[derive(Clone)] -enum ValueOrOffset +struct AttributeValue where R: gimli::Reader, { - Value(gimli::AttributeValue), - Offset(R::Offset), + value: gimli::AttributeValue, + offset: Option, } -impl core::fmt::Debug for ValueOrOffset +impl Clone for AttributeValue where R: gimli::Reader, { - fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { - match self { - Self::Value(value) => value.fmt(fmt), - Self::Offset(value) => write!(fmt, "Offset({value:?})"), + fn clone(&self) -> Self { + AttributeValue { + value: self.value.clone(), + offset: self.offset, } } } +impl core::fmt::Debug for AttributeValue +where + R: gimli::Reader, +{ + fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { + fmt.debug_struct("AttributeValue") + .field("value", &self.value) + .field("offset", &self.offset) + .finish() + } +} + // This is mostly copied verbatim from gimli. fn parse_attribute( input_base: &R, input: &mut R, encoding: gimli::Encoding, attribute: gimli::AttributeSpecification, -) -> Result, ProgramFromElfError> +) -> Result, ProgramFromElfError> where R: gimli::Reader, { @@ -1694,8 +1926,11 @@ where } gimli::constants::DW_FORM_addr => { let offset = input.offset_from(input_base); - let _ = input.read_address(encoding.address_size)?; // AttributeValue::Addr - break Ok(ValueOrOffset::Offset(offset)); + let value = AttributeValue::Addr(input.read_address(encoding.address_size)?); + break Ok(self::AttributeValue { + value, + offset: Some(offset), + }); } gimli::constants::DW_FORM_block1 => { let block = length_u8_value(input)?; @@ -1902,7 +2137,7 @@ where } }; - break Ok(ValueOrOffset::Value(value)); + break Ok(self::AttributeValue { value, offset: None }); } } @@ -1910,7 +2145,7 @@ fn iter_attributes<'a, R>( dwarf: &gimli::Dwarf, unit: &'a gimli::Unit, entry_offset: gimli::UnitOffset, -) -> Result), ProgramFromElfError>> + 'a, ProgramFromElfError> +) -> Result), ProgramFromElfError>> + 'a, ProgramFromElfError> where R: gimli::Reader, { @@ -1956,24 +2191,52 @@ where let entry = cursor.current().ok_or(gimli::Error::MissingUnitDie)?; let entry_offset = entry.offset(); + log::trace!("Extracting low PC for unit at offset {entry_offset:?}..."); for pair in iter_attributes(dwarf, unit, entry_offset)? { let (name, value) = pair?; if name != gimli::constants::DW_AT_low_pc { continue; } - match value { - ValueOrOffset::Value(gimli::AttributeValue::DebugAddrIndex(gimli::DebugAddrIndex(index))) => { + AttributeValue { + value: gimli::AttributeValue::DebugAddrIndex(gimli::DebugAddrIndex(index)), + .. + } => { let index = gimli::DebugAddrIndex(index); - return Ok(Some(resolve_debug_addr_index(sections.debug_addr, relocations, unit, index)?)); + return resolve_debug_addr_index(sections.debug_addr, relocations, unit, index); } - ValueOrOffset::Offset(offset) => { + AttributeValue { + value: gimli::AttributeValue::Addr(address), + offset: Some(offset), + .. + } => { let relocation_target = SectionTarget { section_index: sections.debug_info.index(), offset: offset.into_u64(), }; - return fetch_relocation(relocations, relocation_target).map(Some); + let Some(relocation) = relocations.get(&relocation_target) else { + if address == 0 { + // Clang likes to emit these when compiling C++. + continue; + } + + return Err(ProgramFromElfError::other(format!( + "failed to process DWARF: failed to fetch DW_AT_low_pc for a unit: {relocation_target} has no relocation" + ))); + }; + + let RelocationKind::Abs { + target, + size: RelocationSize::U32, + } = relocation + else { + return Err(ProgramFromElfError::other(format!( + "failed to process DWARF: failed to fetch DW_AT_low_pc for a unit: unexpected relocation at {relocation_target}: {relocation:?}" + ))); + }; + + return Ok(Some(*target)); } value => { return Err(ProgramFromElfError::other(format!( @@ -2002,6 +2265,7 @@ pub(crate) fn load_dwarf( debug_addr: elf.section_by_name(".debug_addr"), debug_ranges: elf.section_by_name(".debug_ranges"), debug_rnglists: elf.section_by_name(".debug_rnglists"), + debug_line: elf.section_by_name(".debug_line"), }; let mut load_section = |id: gimli::SectionId| -> Result<_, ProgramFromElfError> { @@ -2012,10 +2276,12 @@ pub(crate) fn load_dwarf( }; let data: std::rc::Rc<[u8]> = data.into(); - Ok(gimli::read::EndianRcSlice::new(data, gimli::LittleEndian)) + let reader = gimli::read::EndianRcSlice::new(data, gimli::LittleEndian); + let reader = ReaderWrapper::wrap(reader); + Ok(reader) }; - let dwarf: gimli::Dwarf> = gimli::Dwarf::load(&mut load_section)?; + let dwarf: gimli::Dwarf>> = gimli::Dwarf::load(&mut load_section)?; let mut units = Vec::new(); { let mut iter = dwarf.units(); @@ -2043,10 +2309,8 @@ pub(crate) fn load_dwarf( } }; - let Some(low_pc) = extract_symbolic_low_pc(&dwarf, §ions, relocations, &unit)? else { - continue; - }; - + log::trace!("Processing unit: {offset:?}"); + let low_pc = extract_symbolic_low_pc(&dwarf, §ions, relocations, &unit)?; let paths = extract_paths(&dwarf, string_cache, &unit)?; units.push(Unit { low_pc, diff --git a/crates/polkavm-linker/src/lib.rs b/crates/polkavm-linker/src/lib.rs index f15c132d..bef749ad 100644 --- a/crates/polkavm-linker/src/lib.rs +++ b/crates/polkavm-linker/src/lib.rs @@ -6,6 +6,7 @@ mod dwarf; mod elf; mod fast_range_map; mod program_from_elf; +mod reader_wrapper; mod riscv; mod utils; diff --git a/crates/polkavm-linker/src/program_from_elf.rs b/crates/polkavm-linker/src/program_from_elf.rs index 53add4f4..6b70a6cb 100644 --- a/crates/polkavm-linker/src/program_from_elf.rs +++ b/crates/polkavm-linker/src/program_from_elf.rs @@ -2873,6 +2873,10 @@ pub fn program_from_elf(config: Config, data: &[u8]) -> Result +where + R: gimli::Reader, +{ + start: R, + is_enabled: bool, + list: Vec, +} + +pub struct ReaderTracker +where + R: gimli::Reader, +{ + tracker: Rc>>, +} + +impl ReaderTracker +where + R: gimli::Reader, +{ + pub fn list(&self) -> core::cell::Ref<[u64]> { + core::cell::Ref::map(self.tracker.borrow(), |tracker| tracker.list.as_slice()) + } +} + +impl Drop for ReaderTracker +where + R: gimli::Reader, +{ + fn drop(&mut self) { + self.tracker.borrow_mut().is_enabled = false; + } +} + +/// A wrapper over a `gimli::Reader` which allows us to track the offsets from which we're reading. +#[derive(Clone, Debug)] +pub struct ReaderWrapper +where + R: gimli::Reader, +{ + inner: R, + tracker: Rc>>, +} + +impl ReaderWrapper +where + R: gimli::Reader, +{ + pub fn wrap(reader: R) -> Self { + let tracker = Rc::new(RefCell::new(TrackerImpl { + start: reader.clone(), + is_enabled: false, + list: Vec::new(), + })); + ReaderWrapper { inner: reader, tracker } + } + + pub fn start_tracking(&self) -> ReaderTracker { + { + assert!(!self.tracker.borrow().is_enabled); + let mut tracker = self.tracker.borrow_mut(); + tracker.is_enabled = true; + tracker.list.clear(); + } + ReaderTracker { + tracker: self.tracker.clone(), + } + } + + fn track(&self) { + let mut tracker = self.tracker.borrow_mut(); + if !tracker.is_enabled { + return; + } + + let offset = self.inner.offset_from(&tracker.start); + tracker.list.push(offset.into_u64()); + } +} + +impl gimli::Reader for ReaderWrapper +where + R: gimli::Reader, +{ + type Endian = ::Endian; + type Offset = ::Offset; + + fn endian(&self) -> Self::Endian { + self.inner.endian() + } + + fn len(&self) -> Self::Offset { + self.inner.len() + } + + fn empty(&mut self) { + self.inner.empty() + } + + fn truncate(&mut self, len: Self::Offset) -> Result<()> { + self.inner.truncate(len) + } + + fn offset_from(&self, base: &Self) -> Self::Offset { + self.inner.offset_from(&base.inner) + } + + fn offset_id(&self) -> ReaderOffsetId { + self.inner.offset_id() + } + + fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option { + self.inner.lookup_offset_id(id) + } + + fn find(&self, byte: u8) -> Result { + self.inner.find(byte) + } + + fn skip(&mut self, len: Self::Offset) -> Result<()> { + self.inner.skip(len) + } + + fn split(&mut self, len: Self::Offset) -> Result { + Ok(Self { + inner: self.inner.split(len)?, + tracker: self.tracker.clone(), + }) + } + + fn to_slice(&self) -> Result> { + self.inner.to_slice() + } + + fn to_string(&self) -> Result> { + self.inner.to_string() + } + + fn to_string_lossy(&self) -> Result> { + self.inner.to_string_lossy() + } + + fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> { + self.track(); + self.inner.read_slice(buf) + } + + fn read_u8_array(&mut self) -> Result + where + A: Sized + Default + AsMut<[u8]>, + { + self.track(); + self.inner.read_u8_array::() + } + + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + fn read_u8(&mut self) -> Result { + self.track(); + self.inner.read_u8() + } + + fn read_i8(&mut self) -> Result { + self.track(); + self.inner.read_i8() + } + + fn read_u16(&mut self) -> Result { + self.track(); + self.inner.read_u16() + } + + fn read_i16(&mut self) -> Result { + self.track(); + self.inner.read_i16() + } + + fn read_u32(&mut self) -> Result { + self.track(); + self.inner.read_u32() + } + + fn read_i32(&mut self) -> Result { + self.track(); + self.inner.read_i32() + } + + fn read_u64(&mut self) -> Result { + self.track(); + self.inner.read_u64() + } + + fn read_i64(&mut self) -> Result { + self.track(); + self.inner.read_i64() + } + + fn read_f32(&mut self) -> Result { + self.track(); + self.inner.read_f32() + } + + fn read_f64(&mut self) -> Result { + self.track(); + self.inner.read_f64() + } + + fn read_uint(&mut self, n: usize) -> Result { + self.track(); + self.inner.read_uint(n) + } + + fn read_null_terminated_slice(&mut self) -> Result { + self.track(); + Ok(Self { + inner: self.inner.read_null_terminated_slice()?, + tracker: self.tracker.clone(), + }) + } + + fn skip_leb128(&mut self) -> Result<()> { + self.track(); + self.inner.skip_leb128() + } + + fn read_uleb128(&mut self) -> Result { + self.track(); + self.inner.read_uleb128() + } + + fn read_uleb128_u32(&mut self) -> Result { + self.track(); + self.inner.read_uleb128_u32() + } + + fn read_uleb128_u16(&mut self) -> Result { + self.track(); + self.inner.read_uleb128_u16() + } + + fn read_sleb128(&mut self) -> Result { + self.track(); + self.inner.read_sleb128() + } + + fn read_initial_length(&mut self) -> Result<(Self::Offset, Format)> { + self.track(); + self.inner.read_initial_length() + } + + fn read_address(&mut self, address_size: u8) -> Result { + self.track(); + self.inner.read_address(address_size) + } + + fn read_word(&mut self, format: Format) -> Result { + self.track(); + self.inner.read_word(format) + } + + fn read_length(&mut self, format: Format) -> Result { + self.track(); + self.inner.read_length(format) + } + + fn read_offset(&mut self, format: Format) -> Result { + self.track(); + self.inner.read_offset(format) + } + + fn read_sized_offset(&mut self, size: u8) -> Result { + self.track(); + self.inner.read_sized_offset(size) + } +} diff --git a/crates/polkavm-linux-raw/src/lib.rs b/crates/polkavm-linux-raw/src/lib.rs index 94f29ffc..20af2be8 100644 --- a/crates/polkavm-linux-raw/src/lib.rs +++ b/crates/polkavm-linux-raw/src/lib.rs @@ -51,11 +51,17 @@ use std::borrow::Cow; #[allow(non_camel_case_types)] type c_size_t = usize; +#[allow(non_camel_case_types)] +pub type size_t = c_size_t; + // Doesn't appear in public headers. pub const MNT_FORCE: u32 = 1; pub const MNT_DETACH: u32 = 2; pub const MNT_EXPIRE: u32 = 4; +pub const SIG_DFL: usize = 0; +pub const SIG_IGN: usize = 1; + pub use crate::arch_amd64_bindings::{ __NR_capset as SYS_capset, __NR_chdir as SYS_chdir, __NR_clock_gettime as SYS_clock_gettime, __NR_clone as SYS_clone, __NR_clone3 as SYS_clone3, __NR_close as SYS_close, __NR_close_range as SYS_close_range, __NR_dup3 as SYS_dup3, diff --git a/crates/polkavm-linux-raw/src/mmap.rs b/crates/polkavm-linux-raw/src/mmap.rs index 5972a52d..251fa85b 100644 --- a/crates/polkavm-linux-raw/src/mmap.rs +++ b/crates/polkavm-linux-raw/src/mmap.rs @@ -29,6 +29,7 @@ impl Mmap { } self.length = 0; + self.pointer = core::ptr::NonNull::::dangling().as_ptr(); } Ok(()) @@ -62,7 +63,7 @@ impl Mmap { impl Default for Mmap { fn default() -> Self { Self { - pointer: core::ptr::null_mut(), + pointer: core::ptr::NonNull::::dangling().as_ptr() as *mut c_void, length: 0, } } diff --git a/crates/polkavm-linux-sandbox/Cargo.toml b/crates/polkavm-linux-sandbox/Cargo.toml deleted file mode 100644 index b137a5ff..00000000 --- a/crates/polkavm-linux-sandbox/Cargo.toml +++ /dev/null @@ -1,18 +0,0 @@ -[package] -name = "polkavm-linux-sandbox" -version.workspace = true -authors.workspace = true -license.workspace = true -edition.workspace = true -rust-version.workspace = true -repository.workspace = true -description = "Sandbox implementation for PolkaVM (Linux-only)" - -[dependencies] -log = { workspace = true } -polkavm-common = { workspace = true, features = ["std"] } -polkavm-linux-raw = { workspace = true, features = ["std"] } - -[dev-dependencies] -env_logger = { workspace = true } -polkavm-assembler = { workspace = true } diff --git a/crates/polkavm-linux-sandbox/README.md b/crates/polkavm-linux-sandbox/README.md deleted file mode 100644 index 9033b0b6..00000000 --- a/crates/polkavm-linux-sandbox/README.md +++ /dev/null @@ -1,2 +0,0 @@ -This is an internal crate of PolkaVM; **do not** use it directly unless -you absolutely know what you're doing. diff --git a/crates/polkavm-zygote/build-and-replace-debug.sh b/crates/polkavm-zygote/build-and-replace-debug.sh index 632647a5..d5c2f5ce 100755 --- a/crates/polkavm-zygote/build-and-replace-debug.sh +++ b/crates/polkavm-zygote/build-and-replace-debug.sh @@ -3,4 +3,4 @@ set -euo pipefail cargo build -cp target/x86_64-unknown-linux-gnu/debug/polkavm-zygote ../polkavm-linux-sandbox/src/ +cp target/x86_64-unknown-linux-gnu/debug/polkavm-zygote ../polkavm/src/sandbox/ diff --git a/crates/polkavm-zygote/build-and-replace.sh b/crates/polkavm-zygote/build-and-replace.sh index ee3d8b39..1838a720 100755 --- a/crates/polkavm-zygote/build-and-replace.sh +++ b/crates/polkavm-zygote/build-and-replace.sh @@ -3,4 +3,4 @@ set -euo pipefail cargo build --release -cp target/x86_64-unknown-linux-gnu/release/polkavm-zygote ../polkavm-linux-sandbox/src/ +cp target/x86_64-unknown-linux-gnu/release/polkavm-zygote ../polkavm/src/sandbox/ diff --git a/crates/polkavm-zygote/src/main.rs b/crates/polkavm-zygote/src/main.rs index f8718f51..df2ca07c 100644 --- a/crates/polkavm-zygote/src/main.rs +++ b/crates/polkavm-zygote/src/main.rs @@ -13,7 +13,7 @@ use polkavm_common::{ VmCtx as VmCtxInner, SANDBOX_EMPTY_NATIVE_PROGRAM_COUNTER, SANDBOX_EMPTY_NTH_INSTRUCTION, VMCTX_FUTEX_BUSY, VMCTX_FUTEX_HOSTCALL, VMCTX_FUTEX_IDLE, VMCTX_FUTEX_INIT, VMCTX_FUTEX_TRAP, VM_ADDR_JUMP_TABLE, VM_ADDR_JUMP_TABLE_RETURN_TO_HOST, VM_ADDR_NATIVE_CODE, VM_ADDR_SIGSTACK, VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, VM_RPC_FLAG_RECONFIGURE, VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, - VM_RPC_FLAG_SIGSTOP_BEFORE_EXECUTION, VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE, VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE, + VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE, VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE, }, }; use polkavm_linux_raw as linux_raw; @@ -481,7 +481,6 @@ unsafe fn initialize(mut stack: *mut usize) -> linux_raw::Fd { (if a == linux_raw::SYS_write => jump @3), (if a == linux_raw::SYS_recvmsg => jump @2), (if a == linux_raw::SYS_rt_sigreturn => jump @1), - (if a == linux_raw::SYS_kill => jump @7), (seccomp_kill_thread), // SYS_recvmsg @@ -510,13 +509,6 @@ unsafe fn initialize(mut stack: *mut usize) -> linux_raw::Fd { (if a != linux_raw::PROT_EXEC => jump @0), (seccomp_allow), - // SYS_kill - ([7]: a = syscall_arg[0]), - (if a != 0 => jump @0), - (a = syscall_arg[1]), - (if a != linux_raw::SIGSTOP => jump @0), - (seccomp_allow), - ([0]: seccomp_kill_thread), ([1]: seccomp_allow), }; @@ -566,10 +558,6 @@ unsafe fn main_loop(socket: linux_raw::Fd) -> ! { reconfigure(socket.borrow()); } - if rpc_flags & VM_RPC_FLAG_SIGSTOP_BEFORE_EXECUTION != 0 { - send_sigstop_to_self(); - } - if let Some(rpc_address) = rpc_address { trace!("jumping to: 0x{:x}", rpc_address as usize); rpc_address(); @@ -583,10 +571,6 @@ unsafe fn main_loop(socket: linux_raw::Fd) -> ! { } } -fn send_sigstop_to_self() { - linux_raw::sys_kill(0, linux_raw::SIGSTOP).unwrap_or_else(|error| abort_with_error("failed to send SIGSTOP to itself", error)); -} - unsafe fn handle_flags_after_jump(rpc_flags: u32) { if rpc_flags & VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION != 0 { clear_program(); @@ -889,12 +873,15 @@ unsafe fn reconfigure(socket: linux_raw::FdRef) { #[inline(never)] unsafe fn clear_program() { let current = &mut *VMCTX.memory_config.get(); - let user_memory_size = current.user_memory_size(); - if user_memory_size > 0 { - linux_raw::sys_munmap(current.user_memory_address() as *mut core::ffi::c_void, user_memory_size as usize) - .unwrap_or_else(|error| abort_with_error("failed to unmap user memory", error)); + let user_memory_region_size = current.user_memory_region_size(); + if user_memory_region_size > 0 { + linux_raw::sys_munmap( + current.user_memory_region_address() as *mut core::ffi::c_void, + user_memory_region_size as usize, + ) + .unwrap_or_else(|error| abort_with_error("failed to unmap user memory", error)); - current.clear_user_memory_size(); + current.clear_user_memory_sizes(); } if current.stack_size() > 0 { diff --git a/crates/polkavm/Cargo.toml b/crates/polkavm/Cargo.toml index 1dcb370f..794954dc 100644 --- a/crates/polkavm/Cargo.toml +++ b/crates/polkavm/Cargo.toml @@ -15,8 +15,11 @@ log = { workspace = true } polkavm-assembler = { workspace = true } polkavm-common = { workspace = true, features = ["alloc", "logging"] } -[target.'cfg(all(target_arch = "x86_64", target_os = "linux", not(miri)))'.dependencies] -polkavm-linux-sandbox = { workspace = true } +[target.'cfg(all(not(miri), target_arch = "x86_64", target_os = "linux"))'.dependencies] +polkavm-linux-raw = { workspace = true, features = ["std"] } + +[target.'cfg(all(not(miri), target_arch = "x86_64", any(target_os = "linux", target_os = "macos", target_os = "freebsd")))'.dependencies] +libc = { workspace = true } [dev-dependencies] env_logger = { workspace = true } diff --git a/crates/polkavm/src/api.rs b/crates/polkavm/src/api.rs index 54159f75..00b6d027 100644 --- a/crates/polkavm/src/api.rs +++ b/crates/polkavm/src/api.rs @@ -14,12 +14,20 @@ use polkavm_common::program::{FrameKind, Opcode, RawInstruction, Reg}; use polkavm_common::utils::{Access, AsUninitSliceMut}; use crate::caller::{Caller, CallerRaw}; -use crate::compiler::{CompiledAccess, CompiledInstance, CompiledModule}; -use crate::config::{Backend, Config}; +use crate::config::{BackendKind, Config, SandboxKind}; use crate::error::{bail, Error, ExecutionError}; use crate::interpreter::{InterpretedAccess, InterpretedInstance, InterpretedModule}; use crate::tracer::Tracer; +if_compiler_is_supported! { + use crate::sandbox::Sandbox; + use crate::sandbox::generic::Sandbox as SandboxGeneric; + use crate::compiler::{CompiledInstance, CompiledModule}; + + #[cfg(target_os = "linux")] + use crate::sandbox::linux::Sandbox as SandboxLinux; +} + struct DisplayFn<'a, Args> { name: &'a str, args: Args, @@ -63,17 +71,50 @@ impl Engine { } } + if let Some(sandbox) = config.sandbox { + if !sandbox.is_supported() { + bail!("the '{sandbox}' backend is not supported on this platform") + } + } + #[allow(clippy::collapsible_if)] if !config.allow_insecure { if config.trace_execution { bail!("cannot enable trace execution: `set_allow_insecure`/`POLKAVM_ALLOW_INSECURE` is not enabled"); } + + if let Some(sandbox) = config.sandbox { + if matches!(sandbox, SandboxKind::Generic) { + bail!("cannot use the '{sandbox}' sandbox: this sandbox is not secure yet, and `set_allow_insecure`/`POLKAVM_ALLOW_INSECURE` is not enabled"); + } + } } Ok(Engine { config: config.clone() }) } } +if_compiler_is_supported! { + { + pub(crate) enum CompiledModuleKind { + #[cfg(target_os = "linux")] + Linux(CompiledModule), + Generic(CompiledModule), + Unavailable, + } + } else { + pub(crate) enum CompiledModuleKind { + Unavailable, + } + } +} + +impl CompiledModuleKind { + pub fn is_some(&self) -> bool { + !matches!(self, CompiledModuleKind::Unavailable) + } +} + struct ModulePrivate { debug_trace_execution: bool, exports: Vec>, @@ -83,11 +124,36 @@ struct ModulePrivate { jump_target_to_instruction: HashMap, blob: Option>, - compiled_module: Option, + compiled_module: CompiledModuleKind, interpreted_module: Option, memory_config: GuestMemoryConfig, } +if_compiler_is_supported! { + pub(crate) trait AsCompiledModule where S: Sandbox { + fn as_compiled_module(&self) -> Option<&CompiledModule>; + } + + #[cfg(target_os = "linux")] + impl AsCompiledModule for Module { + fn as_compiled_module(&self) -> Option<&CompiledModule> { + match self.0.compiled_module { + CompiledModuleKind::Linux(ref module) => Some(module), + _ => None + } + } + } + + impl AsCompiledModule for Module { + fn as_compiled_module(&self) -> Option<&CompiledModule> { + match self.0.compiled_module { + CompiledModuleKind::Generic(ref module) => Some(module), + _ => None + } + } + } +} + /// A compiled PolkaVM program module. #[derive(Clone)] pub struct Module(Arc); @@ -101,8 +167,8 @@ impl Module { &self.0.instructions } - pub(crate) fn compiled_module(&self) -> Option<&CompiledModule> { - self.0.compiled_module.as_ref() + pub(crate) fn compiled_module(&self) -> &CompiledModuleKind { + &self.0.compiled_module } pub(crate) fn interpreted_module(&self) -> Option<&InterpretedModule> { @@ -240,20 +306,51 @@ impl Module { .with_bss(blob.bss_size()) .with_stack(blob.stack_size()); - let default_backend = if Backend::Compiler.is_supported() { - Backend::Compiler + let default_backend = if BackendKind::Compiler.is_supported() && SandboxKind::Linux.is_supported() { + BackendKind::Compiler } else { - Backend::Interpreter + BackendKind::Interpreter }; + let selected_backend = engine.config.backend.unwrap_or(default_backend); - let compiler_enabled = selected_backend == Backend::Compiler; - let interpreter_enabled = debug_trace_execution || selected_backend == Backend::Interpreter; + let compiler_enabled = selected_backend == BackendKind::Compiler; + let interpreter_enabled = debug_trace_execution || selected_backend == BackendKind::Interpreter; let compiled_module = if compiler_enabled { - Some(CompiledModule::new(&instructions, &exports, init, debug_trace_execution)?) + if_compiler_is_supported! { + { + let default_sandbox = if SandboxKind::Linux.is_supported() { + SandboxKind::Linux + } else { + SandboxKind::Generic + }; + + let selected_sandbox = engine.config.sandbox.unwrap_or(default_sandbox); + match selected_sandbox { + SandboxKind::Linux => { + #[cfg(target_os = "linux")] + { + let module = CompiledModule::new(&instructions, &exports, init, debug_trace_execution)?; + CompiledModuleKind::Linux(module) + } + + #[cfg(not(target_os = "linux"))] + { + CompiledModuleKind::Unavailable + } + }, + SandboxKind::Generic => { + let module = CompiledModule::new(&instructions, &exports, init, debug_trace_execution)?; + CompiledModuleKind::Generic(module) + } + } + } else { + CompiledModuleKind::Unavailable + } + } } else { - None + CompiledModuleKind::Unavailable }; let interpreted_module = if interpreter_enabled { @@ -298,7 +395,7 @@ impl Module { export_index_by_name, jump_target_to_instruction, - blob: if debug_trace_execution || selected_backend == Backend::Interpreter { + blob: if debug_trace_execution || selected_backend == BackendKind::Interpreter { Some(blob.clone().into_owned()) } else { None @@ -1052,12 +1149,34 @@ impl Clone for InstancePre { impl InstancePre { /// Instantiates a new module. pub fn instantiate(&self) -> Result, Error> { - let backend = if self.0.module.0.compiled_module.is_some() { - let compiled_instance = CompiledInstance::new(self.0.module.clone())?; - InstanceBackend::Compiled(compiled_instance) - } else { - let interpreted_instance = InterpretedInstance::new(self.0.module.clone())?; - InstanceBackend::Interpreted(interpreted_instance) + let compiled_module = &self.0.module.0.compiled_module; + let backend = if_compiler_is_supported! { + { + match compiled_module { + #[cfg(target_os = "linux")] + CompiledModuleKind::Linux(..) => { + let compiled_instance = CompiledInstance::new(self.0.module.clone())?; + Some(InstanceBackend::CompiledLinux(compiled_instance)) + }, + CompiledModuleKind::Generic(..) => { + let compiled_instance = CompiledInstance::new(self.0.module.clone())?; + Some(InstanceBackend::CompiledGeneric(compiled_instance)) + }, + CompiledModuleKind::Unavailable => None + } + } else { + match compiled_module { + CompiledModuleKind::Unavailable => None + } + } + }; + + let backend = match backend { + Some(backend) => backend, + None => { + let interpreted_instance = InterpretedInstance::new(self.0.module.clone())?; + InstanceBackend::Interpreted(interpreted_instance) + } }; let tracer = if self.0.module.0.debug_trace_execution { @@ -1076,79 +1195,156 @@ impl InstancePre { } } -enum InstanceBackend { - Compiled(CompiledInstance), - Interpreted(InterpretedInstance), +if_compiler_is_supported! { + { + enum InstanceBackend { + #[cfg(target_os = "linux")] + CompiledLinux(CompiledInstance), + CompiledGeneric(CompiledInstance), + Interpreted(InterpretedInstance), + } + } else { + enum InstanceBackend { + Interpreted(InterpretedInstance), + } + } } impl InstanceBackend { fn call(&mut self, export_index: usize, on_hostcall: OnHostcall, config: &ExecutionConfig) -> Result<(), ExecutionError> { - match self { - InstanceBackend::Compiled(ref mut backend) => backend.call(export_index, on_hostcall, config), - InstanceBackend::Interpreted(ref mut backend) => backend.call(export_index, on_hostcall, config), + if_compiler_is_supported! { + { + match self { + #[cfg(target_os = "linux")] + InstanceBackend::CompiledLinux(ref mut backend) => backend.call(export_index, on_hostcall, config), + InstanceBackend::CompiledGeneric(ref mut backend) => backend.call(export_index, on_hostcall, config), + InstanceBackend::Interpreted(ref mut backend) => backend.call(export_index, on_hostcall, config), + } + } else { + match self { + InstanceBackend::Interpreted(ref mut backend) => backend.call(export_index, on_hostcall, config), + } + } } } fn access(&mut self) -> BackendAccess { - match self { - InstanceBackend::Compiled(ref mut backend) => BackendAccess::Compiled(backend.access()), - InstanceBackend::Interpreted(ref mut backend) => BackendAccess::Interpreted(backend.access()), + if_compiler_is_supported! { + { + match self { + #[cfg(target_os = "linux")] + InstanceBackend::CompiledLinux(ref mut backend) => BackendAccess::CompiledLinux(backend.access()), + InstanceBackend::CompiledGeneric(ref mut backend) => BackendAccess::CompiledGeneric(backend.access()), + InstanceBackend::Interpreted(ref mut backend) => BackendAccess::Interpreted(backend.access()), + } + } else { + match self { + InstanceBackend::Interpreted(ref mut backend) => BackendAccess::Interpreted(backend.access()), + } + } } } } -pub enum BackendAccess<'a> { - #[allow(dead_code)] - Compiled(CompiledAccess<'a>), - Interpreted(InterpretedAccess<'a>), +#[derive(Debug)] +pub struct MemoryAccessError { + pub address: u32, + pub length: u64, + pub error: T, +} + +impl core::fmt::Display for MemoryAccessError +where + T: core::fmt::Display, +{ + fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + fmt, + "out of range memory access in 0x{:x}-0x{:x} ({} bytes): {}", + self.address, + (self.address as u64) + self.length, + self.length, + self.error + ) + } +} + +fn map_access_error(error: MemoryAccessError) -> Trap +where + T: core::fmt::Display, +{ + log::warn!("{error}"); + Trap::default() +} + +if_compiler_is_supported! { + { + pub enum BackendAccess<'a> { + #[cfg(target_os = "linux")] + CompiledLinux(::Access<'a>), + CompiledGeneric(::Access<'a>), + Interpreted(InterpretedAccess<'a>), + } + } else { + pub enum BackendAccess<'a> { + Interpreted(InterpretedAccess<'a>), + } + } +} + +if_compiler_is_supported! { + { + macro_rules! access_backend { + ($itself:ident, |$access:ident| $e:expr) => { + match $itself { + #[cfg(target_os = "linux")] + BackendAccess::CompiledLinux($access) => $e, + BackendAccess::CompiledGeneric($access) => $e, + BackendAccess::Interpreted($access) => $e, + } + } + } + } else { + macro_rules! access_backend { + ($itself:ident, |$access:ident| $e:expr) => { + match $itself { + BackendAccess::Interpreted($access) => $e, + } + } + } + } } impl<'a> Access<'a> for BackendAccess<'a> { type Error = Trap; fn get_reg(&self, reg: Reg) -> u32 { - match self { - BackendAccess::Compiled(access) => access.get_reg(reg), - BackendAccess::Interpreted(access) => access.get_reg(reg), - } + access_backend!(self, |access| access.get_reg(reg)) } fn set_reg(&mut self, reg: Reg, value: u32) { - match self { - BackendAccess::Compiled(access) => access.set_reg(reg, value), - BackendAccess::Interpreted(access) => access.set_reg(reg, value), - } + access_backend!(self, |access| access.set_reg(reg, value)) } fn read_memory_into_slice<'slice, B>(&self, address: u32, buffer: &'slice mut B) -> Result<&'slice mut [u8], Self::Error> where B: ?Sized + AsUninitSliceMut, { - match self { - BackendAccess::Compiled(access) => Ok(access.read_memory_into_slice(address, buffer)?), - BackendAccess::Interpreted(access) => Ok(access.read_memory_into_slice(address, buffer)?), - } + access_backend!(self, |access| Ok(access + .read_memory_into_slice(address, buffer) + .map_err(map_access_error)?)) } fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Self::Error> { - match self { - BackendAccess::Compiled(access) => Ok(access.write_memory(address, data)?), - BackendAccess::Interpreted(access) => Ok(access.write_memory(address, data)?), - } + access_backend!(self, |access| Ok(access.write_memory(address, data).map_err(map_access_error)?)) } fn program_counter(&self) -> Option { - match self { - BackendAccess::Compiled(access) => access.program_counter(), - BackendAccess::Interpreted(access) => access.program_counter(), - } + access_backend!(self, |access| access.program_counter()) } fn native_program_counter(&self) -> Option { - match self { - BackendAccess::Compiled(access) => access.native_program_counter(), - BackendAccess::Interpreted(access) => access.native_program_counter(), - } + access_backend!(self, |access| access.native_program_counter()) } } @@ -1275,6 +1471,7 @@ impl Instance { pub struct ExecutionConfig { pub(crate) reset_memory_after_execution: bool, + pub(crate) clear_program_after_execution: bool, pub(crate) initial_regs: [u32; Reg::ALL_NON_ZERO.len()], } @@ -1286,6 +1483,7 @@ impl Default for ExecutionConfig { ExecutionConfig { reset_memory_after_execution: false, + clear_program_after_execution: false, initial_regs, } } @@ -1297,6 +1495,11 @@ impl ExecutionConfig { self } + pub fn set_clear_program_after_execution(&mut self, value: bool) -> &mut Self { + self.clear_program_after_execution = value; + self + } + pub fn set_reg(&mut self, reg: Reg, value: u32) -> &mut Self { if !matches!(reg, Reg::Zero) { self.initial_regs[reg as usize - 1] = value; diff --git a/crates/polkavm/src/compiler.rs b/crates/polkavm/src/compiler.rs index 37725b57..678749ed 100644 --- a/crates/polkavm/src/compiler.rs +++ b/crates/polkavm/src/compiler.rs @@ -3,17 +3,16 @@ use std::collections::HashMap; use polkavm_assembler::{Assembler, Label}; use polkavm_common::error::{ExecutionError, Trap}; use polkavm_common::init::GuestProgramInit; -use polkavm_common::program::{InstructionVisitor, Opcode, ProgramExport, RawInstruction, Reg}; -use polkavm_common::utils::{Access, AsUninitSliceMut}; +use polkavm_common::program::{InstructionVisitor, Opcode, ProgramExport, RawInstruction}; use polkavm_common::zygote::{ - VM_ADDR_JUMP_TABLE, VM_ADDR_NATIVE_CODE, VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH, VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH, + VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH, VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH, }; -use polkavm_linux_sandbox::{ExecuteArgs, Sandbox, SandboxAccess, SandboxConfig, SandboxProgram, SandboxProgramInit}; -use crate::api::{BackendAccess, ExecutionConfig, Module, OnHostcall}; +use crate::api::{BackendAccess, ExecutionConfig, Module, OnHostcall, AsCompiledModule}; use crate::error::{bail, Error}; -pub const IS_SUPPORTED: bool = true; +use crate::sandbox::{Sandbox, SandboxConfig, SandboxProgramInit, ExecuteArgs}; +use crate::config::SandboxKind; #[cfg(target_arch = "x86_64")] mod amd64; @@ -33,6 +32,9 @@ struct Compiler<'a> { ecall_label: Label, trap_label: Label, trace_label: Label, + jump_table_label: Label, + sandbox_kind: SandboxKind, + native_code_address: u64, /// Whether we're compiling a 64-bit program. Currently totally broken and mostly unimplemented. // TODO: Fix this. @@ -47,11 +49,18 @@ struct CompilationResult<'a> { } impl<'a> Compiler<'a> { - fn new(instructions: &'a [RawInstruction], exports: &'a [ProgramExport<'a>], debug_trace_execution: bool) -> Self { + fn new( + instructions: &'a [RawInstruction], + exports: &'a [ProgramExport<'a>], + sandbox_kind: SandboxKind, + debug_trace_execution: bool, + native_code_address: u64, + ) -> Self { let mut asm = Assembler::new(); let ecall_label = asm.forward_declare_label(); let trap_label = asm.forward_declare_label(); let trace_label = asm.forward_declare_label(); + let jump_table_label = asm.forward_declare_label(); Compiler { asm, @@ -67,6 +76,9 @@ impl<'a> Compiler<'a> { ecall_label, trap_label, trace_label, + jump_table_label, + sandbox_kind, + native_code_address, regs_are_64bit: false, debug_trace_execution, } @@ -74,7 +86,7 @@ impl<'a> Compiler<'a> { fn finalize(&mut self) -> Result { assert_eq!(self.asm.len(), 0); - self.asm.set_origin(VM_ADDR_NATIVE_CODE); + self.asm.set_origin(self.native_code_address); for nth_instruction in 0..self.instructions.len() { self.next_instruction = self.instructions.get(nth_instruction + 1).copied(); @@ -136,15 +148,20 @@ impl<'a> Compiler<'a> { for (pc, label) in self.pc_to_label.drain() { let pc = pc as usize; let range = pc * native_pointer_size..(pc + 1) * native_pointer_size; - let address = VM_ADDR_NATIVE_CODE + self.asm.get_label_offset(label) as u64; - log::trace!("Jump table: [0x{:x}] = 0x{:x}", VM_ADDR_JUMP_TABLE + range.start as u64, address); + let address = self.native_code_address + .checked_add_signed(self.asm.get_label_offset(label) as i64) + .expect("overflow"); + + log::trace!("Jump table: [0x{:x}] = 0x{:x}", self.native_code_address + range.start as u64, address); self.jump_table[range].copy_from_slice(&address.to_ne_bytes()); } self.export_trampolines.reserve(self.exports.len()); for export in self.exports { let label = self.export_to_label.get(&export.address()).unwrap(); - let native_address = VM_ADDR_NATIVE_CODE + self.asm.get_label_offset(*label) as u64; + let native_address = self.native_code_address + .checked_add_signed(self.asm.get_label_offset(*label) as i64) + .expect("overflow"); self.export_trampolines.push(native_address); } @@ -156,7 +173,20 @@ impl<'a> Compiler<'a> { epilogue_length ); - let sysreturn_address = VM_ADDR_NATIVE_CODE + self.asm.get_label_offset(label_sysreturn) as u64; + let sysreturn_address = self.native_code_address + .checked_add_signed(self.asm.get_label_offset(label_sysreturn) as i64) + .expect("overflow"); + + match self.sandbox_kind { + SandboxKind::Linux => {}, + SandboxKind::Generic => { + let native_page_size = crate::sandbox::get_native_page_size(); + let padded_length = polkavm_common::utils::align_to_next_page_usize(native_page_size, self.asm.len()).unwrap(); + self.asm.resize(padded_length, Self::PADDING_BYTE); + self.asm.define_label(self.jump_table_label); + } + } + let code = self.asm.finalize(); Ok(CompilationResult { code, @@ -194,19 +224,23 @@ impl<'a> Compiler<'a> { } } -pub struct CompiledModule { - sandbox_program: SandboxProgram, +pub struct CompiledModule where S: Sandbox { + sandbox_program: S::Program, export_trampolines: Vec, } -impl CompiledModule { +impl CompiledModule where S: Sandbox { pub fn new( instructions: &[RawInstruction], exports: &[ProgramExport], init: GuestProgramInit, debug_trace_execution: bool, ) -> Result { - let mut program_assembler = Compiler::new(instructions, exports, debug_trace_execution); + crate::sandbox::assert_native_page_size(); + + let address_space = S::reserve_address_space().map_err(Error::from_display)?; + let native_code_address = crate::sandbox::SandboxAddressSpace::native_code_address(&address_space); + let mut program_assembler = Compiler::new(instructions, exports, S::KIND, debug_trace_execution, native_code_address); let result = program_assembler.finalize()?; let init = SandboxProgramInit::new(init) @@ -214,7 +248,7 @@ impl CompiledModule { .with_jump_table(result.jump_table) .with_sysreturn_address(result.sysreturn_address); - let sandbox_program = SandboxProgram::new(init).map_err(Error::from_display)?; + let sandbox_program = S::prepare_program(init, address_space).map_err(Error::from_display)?; let export_trampolines = result.export_trampolines.to_owned(); Ok(CompiledModule { @@ -224,74 +258,25 @@ impl CompiledModule { } } -pub struct CompiledAccess<'a>(SandboxAccess<'a>); - -impl<'a> Access<'a> for CompiledAccess<'a> { - type Error = Trap; - - fn get_reg(&self, reg: Reg) -> u32 { - self.0.get_reg(reg) - } - - fn set_reg(&mut self, reg: Reg, value: u32) { - self.0.set_reg(reg, value) - } - - fn read_memory_into_slice<'slice, T>(&self, address: u32, buffer: &'slice mut T) -> Result<&'slice mut [u8], Self::Error> - where - T: ?Sized + AsUninitSliceMut, - { - let buffer = buffer.as_uninit_slice_mut(); - let length = buffer.len(); - self.0.read_memory_into_slice(address, buffer).map_err(|error| { - log::error!( - "Out of range read in 0x{:x}-0x{:x} ({} bytes): {error}", - address, - (address as u64 + length as u64) as u32, - length - ); - Trap::default() - }) - } - - fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Self::Error> { - self.0.write_memory(address, data).map_err(|error| { - log::error!( - "Out of range write in 0x{:x}-0x{:x} ({} bytes): {error}", - address, - (address as u64 + data.len() as u64) as u32, - data.len() - ); - Trap::default() - }) - } - - fn program_counter(&self) -> Option { - self.0.program_counter() - } - - fn native_program_counter(&self) -> Option { - self.0.native_program_counter() - } -} - -pub(crate) struct CompiledInstance { +pub(crate) struct CompiledInstance { module: Module, - sandbox: Sandbox, + sandbox: S, } -impl CompiledInstance { - pub fn new(module: Module) -> Result { +impl CompiledInstance where S: Sandbox, Module: AsCompiledModule { + pub fn new(module: Module) -> Result, Error> { let compiled_module = module - .compiled_module() + .as_compiled_module() .expect("internal error: tried to spawn a compiled instance without a compiled module"); - let mut sandbox_config = SandboxConfig::new(); - sandbox_config.enable_logger = cfg!(test) || module.is_debug_trace_execution_enabled(); + + let mut sandbox_config = S::Config::default(); + sandbox_config.enable_logger(cfg!(test) || module.is_debug_trace_execution_enabled()); // TODO: This is really slow as it will always spawn a new process from scratch. Cache this. - let mut sandbox = Sandbox::spawn(&sandbox_config) + let mut sandbox = S::spawn(&sandbox_config) .map_err(Error::from_display) .map_err(|error| error.context("instantiation failed: failed to create a sandbox"))?; + let mut args = ExecuteArgs::new(); args.set_program(&compiled_module.sandbox_program); sandbox @@ -305,19 +290,34 @@ impl CompiledInstance { pub fn call(&mut self, export_index: usize, on_hostcall: OnHostcall, config: &ExecutionConfig) -> Result<(), ExecutionError> { let compiled_module = self .module - .compiled_module() + .as_compiled_module() .expect("internal error: tried to call into a compiled instance without a compiled module"); + let address = compiled_module.export_trampolines[export_index]; - let mut exec_args = ExecuteArgs::new(); + let mut exec_args = ExecuteArgs::::new(); + if config.reset_memory_after_execution { exec_args.set_reset_memory_after_execution(); } + + if config.clear_program_after_execution { + exec_args.set_clear_program_after_execution(); + } + exec_args.set_call(address); exec_args.set_initial_regs(&config.initial_regs); - let mut on_hostcall = move |hostcall: u64, access: SandboxAccess| -> Result<(), Trap> { - on_hostcall(hostcall, BackendAccess::Compiled(CompiledAccess(access))) - }; + + fn wrap_on_hostcall(on_hostcall: OnHostcall<'_>) -> impl for <'r> FnMut(u64, S::Access<'r>) -> Result<(), Trap> + '_ where S: Sandbox { + move |hostcall, access| { + let access: BackendAccess = access.into(); + on_hostcall(hostcall, access) + } + } + + + let mut on_hostcall = wrap_on_hostcall(on_hostcall); exec_args.set_on_hostcall(&mut on_hostcall); + match self.sandbox.execute(exec_args) { Ok(()) => Ok(()), Err(ExecutionError::Error(error)) => Err(ExecutionError::Error(Error::from_display(error))), @@ -325,7 +325,7 @@ impl CompiledInstance { } } - pub fn access(&mut self) -> CompiledAccess { - CompiledAccess(self.sandbox.access()) + pub fn access(&'_ mut self) -> S::Access<'_> { + self.sandbox.access() } } diff --git a/crates/polkavm/src/compiler/amd64.rs b/crates/polkavm/src/compiler/amd64.rs index 6f2354b8..e4206575 100644 --- a/crates/polkavm/src/compiler/amd64.rs +++ b/crates/polkavm/src/compiler/amd64.rs @@ -6,15 +6,31 @@ use polkavm_assembler::Label; use polkavm_common::program::{InstructionVisitor, Reg}; use polkavm_common::zygote::{ - VmCtx, SYSCALL_HOSTCALL, SYSCALL_RETURN, SYSCALL_TRACE, SYSCALL_TRAP, VM_ADDR_JUMP_TABLE, VM_ADDR_SYSCALL, VM_ADDR_VMCTX, + VmCtx as LinuxVmCtx, + SYSCALL_HOSTCALL, SYSCALL_RETURN, SYSCALL_TRACE, SYSCALL_TRAP, VM_ADDR_JUMP_TABLE, VM_ADDR_SYSCALL, VM_ADDR_VMCTX, }; -use crate::compiler::Compiler; +use crate::compiler::{Compiler, SandboxKind}; +use crate::sandbox::generic::VmCtx as GenericVmCtx; use Reg::Zero as Z; +macro_rules! get_field_offset { + ($struct:expr, |$struct_ident:ident| $get_field:expr) => {{ + let $struct_ident = $struct; + let struct_ref = &$struct_ident; + let field_ref = $get_field; + let struct_addr = struct_ref as *const _ as usize; + let field_addr = field_ref as *const _ as usize; + field_addr - struct_addr + }} +} + const TMP_REG: NativeReg = rcx; +// The register used for the embedded sandbox to hold the base address of the guest's linear memory. +const GUEST_MEMORY_REG: NativeReg = r15; + const fn conv_reg(reg: Reg) -> NativeReg { match reg { Reg::Zero => unreachable!(), @@ -34,19 +50,6 @@ const fn conv_reg(reg: Reg) -> NativeReg { } } -fn regs_address() -> u64 { - let regs_offset: usize = { - let base = VmCtx::new(); - let base_ref = &base; - let field_ref = base.regs().get(); - let base_addr = base_ref as *const _ as usize; - let field_addr = field_ref as *const _ as usize; - field_addr - base_addr - }; - - VM_ADDR_VMCTX + regs_offset as u64 -} - enum Signedness { Signed, Unsigned, @@ -64,6 +67,8 @@ enum ShiftKind { } impl<'a> Compiler<'a> { + pub const PADDING_BYTE: u8 = 0x90; // NOP + fn reg_size(&self) -> RegSize { if !self.regs_are_64bit { RegSize::R32 @@ -80,60 +85,98 @@ impl<'a> Compiler<'a> { self.push(load32_imm(conv_reg(reg), imm)); } + fn embedded_load_store( + &mut self, + src_or_dst: Reg, + base: Reg, + offset: u32, + cb: impl FnOnce(&mut Self, NativeReg, NativeReg) + ) { + // TODO: This could be more efficient. + if base != Reg::Zero { + self.push(mov(RegSize::R32, TMP_REG, conv_reg(base))); + } else { + self.push(xor(RegSize::R32, TMP_REG, TMP_REG)); + } + + if offset != 0 { + self.push(add_imm(RegSize::R32, TMP_REG, offset as i32)); + } + + self.push(add(RegSize::R64, TMP_REG, GUEST_MEMORY_REG)); + if src_or_dst != Reg::Zero { + cb(self, TMP_REG, conv_reg(src_or_dst)); + } else { + self.push(push(GUEST_MEMORY_REG)); + self.push(xor(RegSize::R32, GUEST_MEMORY_REG, GUEST_MEMORY_REG)); + cb(self, TMP_REG, GUEST_MEMORY_REG); + self.push(pop(GUEST_MEMORY_REG)); + } + } + fn store(&mut self, src: Reg, base: Reg, offset: u32, kind: StoreKind) { if self.regs_are_64bit { todo!(); } - match (src, base, (offset as i32 >= 0)) { - // [address] = 0 - // (address is in the lower 2GB of the address space) - (Z, Z, true) => match kind { - StoreKind::U8 => self.push(store8_abs_imm(offset as i32, 0)), - StoreKind::U16 => self.push(store16_abs_imm(offset as i32, 0)), - StoreKind::U32 => self.push(store32_abs_imm(offset as i32, 0)), - StoreKind::U64 => { - self.push(xor(RegSize::R32, TMP_REG, TMP_REG)); - self.push(store_abs(offset as i32, TMP_REG, StoreKind::U64)); - } - }, + match self.sandbox_kind { + SandboxKind::Linux => { + match (src, base, (offset as i32 >= 0)) { + // [address] = 0 + // (address is in the lower 2GB of the address space) + (Z, Z, true) => match kind { + StoreKind::U8 => self.push(store8_abs_imm(offset as i32, 0)), + StoreKind::U16 => self.push(store16_abs_imm(offset as i32, 0)), + StoreKind::U32 => self.push(store32_abs_imm(offset as i32, 0)), + StoreKind::U64 => { + self.push(xor(RegSize::R32, TMP_REG, TMP_REG)); + self.push(store_abs(offset as i32, TMP_REG, StoreKind::U64)); + } + }, - // [address] = src - // (address is in the lower 2GB of the address space) - (_, Z, true) => { - self.push(store_abs(offset as i32, conv_reg(src), kind)); - } + // [address] = src + // (address is in the lower 2GB of the address space) + (_, Z, true) => { + self.push(store_abs(offset as i32, conv_reg(src), kind)); + } - // [address] = 0 - // (address is in the upper 2GB of the address space) - (Z, Z, false) => { - // The offset would get sign extended to full 64-bits if we'd use it - // in a displacement, so we need to do an indirect store here. - self.push(load32_imm(TMP_REG, offset)); - self.push(store32_indirect_imm(RegSize::R32, TMP_REG, 0, 0)); - } + // [address] = 0 + // (address is in the upper 2GB of the address space) + (Z, Z, false) => { + // The offset would get sign extended to full 64-bits if we'd use it + // in a displacement, so we need to do an indirect store here. + self.push(load32_imm(TMP_REG, offset)); + self.push(store32_indirect_imm(RegSize::R32, TMP_REG, 0, 0)); + } - // [address] = src - // (address is in the upper 2GB of the address space) - (_, Z, false) => { - self.push(load32_imm(TMP_REG, offset)); - self.push(store_indirect(RegSize::R32, TMP_REG, 0, conv_reg(src), kind)); - } + // [address] = src + // (address is in the upper 2GB of the address space) + (_, Z, false) => { + self.push(load32_imm(TMP_REG, offset)); + self.push(store_indirect(RegSize::R32, TMP_REG, 0, conv_reg(src), kind)); + } - // [base + offset] = 0 - (Z, _, _) => match kind { - StoreKind::U8 => self.push(store8_indirect_imm(RegSize::R32, conv_reg(base), offset as i32, 0)), - StoreKind::U16 => self.push(store16_indirect_imm(RegSize::R32, conv_reg(base), offset as i32, 0)), - StoreKind::U32 => self.push(store32_indirect_imm(RegSize::R32, conv_reg(base), offset as i32, 0)), - StoreKind::U64 => { - self.push(xor(RegSize::R32, TMP_REG, TMP_REG)); - self.push(store_indirect(RegSize::R32, conv_reg(base), offset as i32, TMP_REG, kind)); + // [base + offset] = 0 + (Z, _, _) => match kind { + StoreKind::U8 => self.push(store8_indirect_imm(RegSize::R32, conv_reg(base), offset as i32, 0)), + StoreKind::U16 => self.push(store16_indirect_imm(RegSize::R32, conv_reg(base), offset as i32, 0)), + StoreKind::U32 => self.push(store32_indirect_imm(RegSize::R32, conv_reg(base), offset as i32, 0)), + StoreKind::U64 => { + self.push(xor(RegSize::R32, TMP_REG, TMP_REG)); + self.push(store_indirect(RegSize::R32, conv_reg(base), offset as i32, TMP_REG, kind)); + } + }, + + // [base + offset] = src + (_, _, _) => { + self.push(store_indirect(RegSize::R32, conv_reg(base), offset as i32, conv_reg(src), kind)); + } } }, - - // [base + offset] = src - (_, _, _) => { - self.push(store_indirect(RegSize::R32, conv_reg(base), offset as i32, conv_reg(src), kind)); + SandboxKind::Generic => { + self.embedded_load_store(src, base, offset, move |itself, address_reg, value_reg| { + itself.push(store_indirect(RegSize::R64, address_reg, 0, value_reg, kind)); + }); } } } @@ -143,22 +186,31 @@ impl<'a> Compiler<'a> { todo!(); } - let dst_native = if dst == Reg::Zero { - // Do a dummy load. We can't just skip this since an invalid load can trigger a trap. - TMP_REG - } else { - conv_reg(dst) - }; + match self.sandbox_kind { + SandboxKind::Linux => { + let dst_native = if dst == Reg::Zero { + // Do a dummy load. We can't just skip this since an invalid load can trigger a trap. + TMP_REG + } else { + conv_reg(dst) + }; - if base == Reg::Zero { - if (offset as i32) < 0 { - self.push(load32_imm(TMP_REG, offset)); - self.push(load_indirect(dst_native, RegSize::R32, TMP_REG, 0, kind)); - } else { - self.push(load_abs(dst_native, offset as i32, kind)); + if base == Reg::Zero { + if (offset as i32) < 0 { + self.push(load32_imm(TMP_REG, offset)); + self.push(load_indirect(dst_native, RegSize::R32, TMP_REG, 0, kind)); + } else { + self.push(load_abs(dst_native, offset as i32, kind)); + } + } else { + self.push(load_indirect(dst_native, RegSize::R32, conv_reg(base), offset as i32, kind)); + } + }, + SandboxKind::Generic => { + self.embedded_load_store(dst, base, offset, move |itself, address_reg, value_reg| { + itself.push(load_indirect(value_reg, RegSize::R64, address_reg, 0, kind)); + }); } - } else { - self.push(load_indirect(dst_native, RegSize::R32, conv_reg(base), offset as i32, kind)); } } @@ -462,14 +514,41 @@ impl<'a> Compiler<'a> { } } + fn load_vmctx_field_address(&mut self, reg: NativeReg, offset: usize) { + match self.sandbox_kind { + SandboxKind::Linux => { + let address = VM_ADDR_VMCTX + offset as u64; + self.push(load64_imm(reg, address)); + }, + SandboxKind::Generic => { + let offset = crate::sandbox::generic::GUEST_MEMORY_TO_VMCTX_OFFSET as i32 + offset as i32; + self.push(lea(RegSize::R64, reg, RegSize::R64, GUEST_MEMORY_REG, offset)); + } + } + } + + fn load_regs_address(&mut self, reg: NativeReg) { + let regs_offset: usize = match self.sandbox_kind { + SandboxKind::Linux => { + get_field_offset!(LinuxVmCtx::new(), |base| base.regs().get()) + }, + SandboxKind::Generic => { + get_field_offset!(GenericVmCtx::new(), |base| base.regs()) + } + }; + + self.load_vmctx_field_address(reg, regs_offset); + } + fn save_registers_to_vmctx(&mut self) { if self.regs_are_64bit { todo!(); } - assert_eq!(Reg::ALL_NON_ZERO.len(), core::mem::size_of_val(VmCtx::new().regs()) / 4); + assert_eq!(Reg::ALL_NON_ZERO.len(), core::mem::size_of_val(LinuxVmCtx::new().regs()) / 4); + assert_eq!(Reg::ALL_NON_ZERO.len(), core::mem::size_of_val(GenericVmCtx::new().regs()) / 4); - self.push(load64_imm(TMP_REG, regs_address())); + self.load_regs_address(TMP_REG); for (nth, reg) in Reg::ALL_NON_ZERO.iter().copied().enumerate() { self.push(store_indirect(RegSize::R64, TMP_REG, nth as i32 * 4, conv_reg(reg), StoreKind::U32)); } @@ -480,7 +559,7 @@ impl<'a> Compiler<'a> { todo!(); } - self.push(load64_imm(TMP_REG, regs_address())); + self.load_regs_address(TMP_REG); for (nth, reg) in Reg::ALL_NON_ZERO.iter().copied().enumerate() { self.push(load_indirect(conv_reg(reg), RegSize::R64, TMP_REG, nth as i32 * 4, LoadKind::U32)); } @@ -512,9 +591,16 @@ impl<'a> Compiler<'a> { let label = self.asm.create_label(); self.save_registers_to_vmctx(); - self.push(load64_imm(TMP_REG, VM_ADDR_SYSCALL)); - self.push(load32_imm(rdi, SYSCALL_RETURN)); - self.push(jmp_reg(TMP_REG)); + match self.sandbox_kind { + SandboxKind::Linux => { + self.push(load64_imm(TMP_REG, VM_ADDR_SYSCALL)); + self.push(load32_imm(rdi, SYSCALL_RETURN)); + self.push(jmp_reg(TMP_REG)); + }, + SandboxKind::Generic => { + self.push(ret()); + } + } label } @@ -523,29 +609,59 @@ impl<'a> Compiler<'a> { log::trace!("Emitting trampoline: ecall"); self.define_label(self.ecall_label); - self.push(push(TMP_REG)); // Save the ecall number. - self.save_registers_to_vmctx(); - self.push(load64_imm(TMP_REG, VM_ADDR_SYSCALL)); - self.push(load32_imm(rdi, SYSCALL_HOSTCALL)); - self.push(pop(rsi)); // Pop the ecall number as an argument. - self.push(call_reg(TMP_REG)); - self.restore_registers_from_vmctx(); - self.push(ret()); + match self.sandbox_kind { + SandboxKind::Linux => { + self.push(push(TMP_REG)); // Save the ecall number. + self.save_registers_to_vmctx(); + self.push(load64_imm(TMP_REG, VM_ADDR_SYSCALL)); + self.push(load32_imm(rdi, SYSCALL_HOSTCALL)); + self.push(pop(rsi)); // Pop the ecall number as an argument. + self.push(call_reg(TMP_REG)); + self.restore_registers_from_vmctx(); + self.push(ret()); + }, + SandboxKind::Generic => { + let handler_address = crate::sandbox::generic::handle_ecall as usize as u64; + self.push(push(TMP_REG)); // Save the ecall number. + self.save_registers_to_vmctx(); + self.push(load64_imm(TMP_REG, handler_address)); + self.push(mov(RegSize::R64, rdi, GUEST_MEMORY_REG)); + self.push(pop(rsi)); // Pop the ecall number as an argument. + self.push(call_reg(TMP_REG)); + self.restore_registers_from_vmctx(); + self.push(ret()); + } + } } pub(crate) fn emit_trace_trampoline(&mut self) { log::trace!("Emitting trampoline: trace"); self.define_label(self.trace_label); - self.push(push(TMP_REG)); // Save the instruction number. - self.save_registers_to_vmctx(); - self.push(load64_imm(TMP_REG, VM_ADDR_SYSCALL)); - self.push(load32_imm(rdi, SYSCALL_TRACE)); - self.push(pop(rsi)); // Pop the instruction number as an argument. - self.push(load_indirect(rdx, RegSize::R64, rsp, -8, LoadKind::U64)); // Grab the return address. - self.push(call_reg(TMP_REG)); - self.restore_registers_from_vmctx(); - self.push(ret()); + match self.sandbox_kind { + SandboxKind::Linux => { + self.push(push(TMP_REG)); // Save the instruction number. + self.save_registers_to_vmctx(); + self.push(load64_imm(TMP_REG, VM_ADDR_SYSCALL)); + self.push(load32_imm(rdi, SYSCALL_TRACE)); + self.push(pop(rsi)); // Pop the instruction number as an argument. + self.push(load_indirect(rdx, RegSize::R64, rsp, -8, LoadKind::U64)); // Grab the return address. + self.push(call_reg(TMP_REG)); + self.restore_registers_from_vmctx(); + self.push(ret()); + }, + SandboxKind::Generic => { + let handler_address = crate::sandbox::generic::handle_trace as usize as u64; + self.push(push(TMP_REG)); // Save the instruction number. + self.save_registers_to_vmctx(); + self.push(load64_imm(TMP_REG, handler_address)); + self.push(mov(RegSize::R64, rdi, GUEST_MEMORY_REG)); + self.push(pop(rsi)); // Pop the instruction number as an argument. + self.push(call_reg(TMP_REG)); + self.restore_registers_from_vmctx(); + self.push(ret()); + } + } } pub(crate) fn emit_trap_trampoline(&mut self) { @@ -553,9 +669,16 @@ impl<'a> Compiler<'a> { self.define_label(self.trap_label); self.save_registers_to_vmctx(); - self.push(load64_imm(TMP_REG, VM_ADDR_SYSCALL)); - self.push(load32_imm(rdi, SYSCALL_TRAP)); - self.push(jmp_reg(TMP_REG)); + match self.sandbox_kind { + SandboxKind::Linux => { + self.push(load64_imm(TMP_REG, VM_ADDR_SYSCALL)); + self.push(load32_imm(rdi, SYSCALL_TRAP)); + self.push(jmp_reg(TMP_REG)); + }, + SandboxKind::Generic => { + self.push(ud2()); // TODO: FIXME + } + } } pub(crate) fn trace_execution(&mut self, nth_instruction: usize) { @@ -1140,18 +1263,36 @@ impl<'a> InstructionVisitor for Compiler<'a> { self.push(jmp_label32(label)); } else { - let offset = offset.wrapping_mul(4); + match self.sandbox_kind { + SandboxKind::Linux => { + // TODO: This could be more efficient. Maybe use fs/gs selector? + if offset == 0 { + self.push(mov(RegSize::R32, TMP_REG, conv_reg(base))); + } else { + let offset = offset.wrapping_mul(4); + self.push(lea(RegSize::R32, TMP_REG, RegSize::R32, conv_reg(base), offset as i32)); + } - // TODO: This could be more efficient. Maybe use fs/gs selector? - if offset == 0 { - self.push(mov(RegSize::R32, TMP_REG, conv_reg(base))); - } else { - self.push(lea(RegSize::R32, TMP_REG, RegSize::R32, conv_reg(base), offset as i32)); + self.push(ror_imm(RegSize::R32, TMP_REG, 2)); + self.push(shl_imm(RegSize::R64, TMP_REG, 3)); + self.push(bts(RegSize::R64, TMP_REG, VM_ADDR_JUMP_TABLE.trailing_zeros() as u8)); + self.push(load_indirect(TMP_REG, RegSize::R64, TMP_REG, 0, LoadKind::U64)); + }, + SandboxKind::Generic => { + // TODO: This also could be more efficient. + // TODO: FIXME: This is broken if the offset is unaligned! + self.push(lea_rip_label(TMP_REG, self.jump_table_label)); + self.push(push(conv_reg(base))); + self.push(shl_imm(RegSize::R64, conv_reg(base), 1)); + if offset > 0 { + let offset = offset.wrapping_mul(8); + self.push(add_imm(RegSize::R32, conv_reg(base), offset as i32)); + } + self.push(add(RegSize::R64, TMP_REG, conv_reg(base))); + self.push(pop(conv_reg(base))); + self.push(load_indirect(TMP_REG, RegSize::R64, TMP_REG, 0, LoadKind::U64)); + } } - self.push(ror_imm(RegSize::R32, TMP_REG, 2)); - self.push(shl_imm(RegSize::R64, TMP_REG, 3)); - self.push(bts(RegSize::R64, TMP_REG, VM_ADDR_JUMP_TABLE.trailing_zeros() as u8)); - self.push(load_indirect(TMP_REG, RegSize::R64, TMP_REG, 0, LoadKind::U64)); if ra != Reg::Zero { match self.next_instruction_jump_target() { diff --git a/crates/polkavm/src/compiler_dummy.rs b/crates/polkavm/src/compiler_dummy.rs deleted file mode 100644 index 2f288c5d..00000000 --- a/crates/polkavm/src/compiler_dummy.rs +++ /dev/null @@ -1,76 +0,0 @@ -use crate::api::{ExecutionConfig, Module, OnHostcall}; -use crate::error::Error; -use polkavm_common::error::{ExecutionError, Trap}; -use polkavm_common::init::GuestProgramInit; -use polkavm_common::program::{ProgramExport, RawInstruction, Reg}; -use polkavm_common::utils::{Access, AsUninitSliceMut}; - -pub const IS_SUPPORTED: bool = false; - -pub struct CompiledModule { - _dummy: (), -} - -impl CompiledModule { - pub fn new( - _instructions: &[RawInstruction], - _exports: &[ProgramExport], - _init: GuestProgramInit, - _debug_trace_execution: bool, - ) -> Result { - unreachable!("the compiler is not supported on this platform") - } -} - -pub struct CompiledAccess<'a> { - _dummy: &'a (), -} - -impl<'a> Access<'a> for CompiledAccess<'a> { - type Error = Trap; - - fn get_reg(&self, _reg: Reg) -> u32 { - unimplemented!(); - } - - fn set_reg(&mut self, _reg: Reg, _value: u32) { - unimplemented!(); - } - - fn read_memory_into_slice<'slice, T>(&self, _address: u32, _buffer: &'slice mut T) -> Result<&'slice mut [u8], Self::Error> - where - T: ?Sized + AsUninitSliceMut, - { - unimplemented!(); - } - - fn write_memory(&mut self, _address: u32, _data: &[u8]) -> Result<(), Self::Error> { - unimplemented!(); - } - - fn program_counter(&self) -> Option { - unimplemented!(); - } - - fn native_program_counter(&self) -> Option { - unimplemented!(); - } -} - -pub(crate) struct CompiledInstance { - _dummy: (), -} - -impl CompiledInstance { - pub fn new(_: Module) -> Result { - unimplemented!(); - } - - pub fn call(&mut self, _export_index: usize, _on_hostcall: OnHostcall, _config: &ExecutionConfig) -> Result<(), ExecutionError> { - unimplemented!(); - } - - pub fn access(&mut self) -> CompiledAccess { - unimplemented!() - } -} diff --git a/crates/polkavm/src/config.rs b/crates/polkavm/src/config.rs index 4dade944..cba201bc 100644 --- a/crates/polkavm/src/config.rs +++ b/crates/polkavm/src/config.rs @@ -1,30 +1,30 @@ use crate::error::{bail, Error}; #[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub enum Backend { +pub enum BackendKind { Compiler, Interpreter, } -impl core::fmt::Display for Backend { +impl core::fmt::Display for BackendKind { fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { let name = match self { - Backend::Compiler => "compiler", - Backend::Interpreter => "interpreter", + BackendKind::Compiler => "compiler", + BackendKind::Interpreter => "interpreter", }; fmt.write_str(name) } } -impl Backend { - fn from_os_str(s: &std::ffi::OsStr) -> Result, Error> { +impl BackendKind { + fn from_os_str(s: &std::ffi::OsStr) -> Result, Error> { if s == "auto" { Ok(None) } else if s == "interpreter" { - Ok(Some(Backend::Interpreter)) + Ok(Some(BackendKind::Interpreter)) } else if s == "compiler" { - Ok(Some(Backend::Compiler)) + Ok(Some(BackendKind::Compiler)) } else { Err(Error::from_static_str( "invalid value of POLKAVM_BACKEND; supported values are: 'interpreter', 'compiler'", @@ -33,18 +33,69 @@ impl Backend { } } -impl Backend { +impl BackendKind { pub fn is_supported(self) -> bool { match self { - Backend::Interpreter => true, - Backend::Compiler => crate::compiler::IS_SUPPORTED, + BackendKind::Interpreter => true, + BackendKind::Compiler => if_compiler_is_supported! { + { true } else { false } + }, + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum SandboxKind { + Linux, + Generic, +} + +impl core::fmt::Display for SandboxKind { + fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { + let name = match self { + SandboxKind::Linux => "linux", + SandboxKind::Generic => "generic", + }; + + fmt.write_str(name) + } +} + +impl SandboxKind { + fn from_os_str(s: &std::ffi::OsStr) -> Result, Error> { + if s == "auto" { + Ok(None) + } else if s == "linux" { + Ok(Some(SandboxKind::Linux)) + } else if s == "generic" { + Ok(Some(SandboxKind::Generic)) + } else { + Err(Error::from_static_str( + "invalid value of POLKAVM_SANDBOX; supported values are: 'linux', 'generic'", + )) + } + } +} + +impl SandboxKind { + pub fn is_supported(self) -> bool { + if_compiler_is_supported! { + { + match self { + SandboxKind::Linux => cfg!(target_os = "linux"), + SandboxKind::Generic => true + } + } else { + false + } } } } #[derive(Clone)] pub struct Config { - pub(crate) backend: Option, + pub(crate) backend: Option, + pub(crate) sandbox: Option, pub(crate) trace_execution: bool, pub(crate) allow_insecure: bool, } @@ -74,6 +125,7 @@ impl Config { pub fn new() -> Self { Config { backend: None, + sandbox: None, trace_execution: false, allow_insecure: false, } @@ -83,7 +135,11 @@ impl Config { pub fn from_env() -> Result { let mut config = Self::new(); if let Some(value) = std::env::var_os("POLKAVM_BACKEND") { - config.backend = Backend::from_os_str(&value)?; + config.backend = BackendKind::from_os_str(&value)?; + } + + if let Some(value) = std::env::var_os("POLKAVM_SANDBOX") { + config.sandbox = SandboxKind::from_os_str(&value)?; } if let Some(value) = env_bool("POLKAVM_TRACE_EXECUTION")? { @@ -102,11 +158,21 @@ impl Config { /// Default: `None` (automatically pick the best available backend) /// /// Corresponding environment variable: `POLKAVM_BACKEND` (`auto`, `compiler`, `interpreter`) - pub fn set_backend(&mut self, backend: Option) -> &mut Self { + pub fn set_backend(&mut self, backend: Option) -> &mut Self { self.backend = backend; self } + /// Forces the use of a given sandbox. + /// + /// Default: `None` (automatically pick the best available sandbox) + /// + /// Corresponding environment variable: `POLKAVM_SANDBOX` (`auto`, `linux`, `generic`) + pub fn set_sandbox(&mut self, sandbox: Option) -> &mut Self { + self.sandbox = sandbox; + self + } + /// Enables execution tracing. /// /// **Requires `set_allow_insecure` to be `true`.** diff --git a/crates/polkavm/src/interpreter.rs b/crates/polkavm/src/interpreter.rs index fb89a925..d15ebbac 100644 --- a/crates/polkavm/src/interpreter.rs +++ b/crates/polkavm/src/interpreter.rs @@ -1,4 +1,4 @@ -use crate::api::{BackendAccess, ExecutionConfig, Module, OnHostcall}; +use crate::api::{BackendAccess, ExecutionConfig, MemoryAccessError, Module, OnHostcall}; use crate::error::{bail, Error}; use core::mem::MaybeUninit; use polkavm_common::abi::VM_ADDR_RETURN_TO_HOST; @@ -211,7 +211,7 @@ pub struct InterpretedAccess<'a> { } impl<'a> Access<'a> for InterpretedAccess<'a> { - type Error = Trap; + type Error = MemoryAccessError<&'static str>; fn get_reg(&self, reg: Reg) -> u32 { if reg == Reg::Zero { @@ -235,13 +235,11 @@ impl<'a> Access<'a> for InterpretedAccess<'a> { { let buffer: &mut [MaybeUninit] = buffer.as_uninit_slice_mut(); let Some(slice) = self.instance.get_memory_slice(address, buffer.len() as u32) else { - log::error!( - "Out of range read in 0x{:x}-0x{:x} ({} bytes)", + return Err(MemoryAccessError { address, - (address as u64 + buffer.len() as u64) as u32, - buffer.len() - ); - return Err(Trap::default()); + length: buffer.len() as u64, + error: "out of range read", + }); }; Ok(byte_slice_init(buffer, slice)) @@ -249,13 +247,11 @@ impl<'a> Access<'a> for InterpretedAccess<'a> { fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Self::Error> { let Some(slice) = self.instance.get_memory_slice_mut(address, data.len() as u32) else { - log::error!( - "Out of range write in 0x{:x}-0x{:x} ({} bytes)", + return Err(MemoryAccessError { address, - (address as u64 + data.len() as u64) as u32, - data.len() - ); - return Err(Trap::default()); + length: data.len() as u64, + error: "out of range write", + }); }; slice.copy_from_slice(data); diff --git a/crates/polkavm/src/lib.rs b/crates/polkavm/src/lib.rs index 4e7d2e74..c1b31139 100644 --- a/crates/polkavm/src/lib.rs +++ b/crates/polkavm/src/lib.rs @@ -1,6 +1,42 @@ #![forbid(unused_must_use)] #![forbid(clippy::missing_safety_doc)] -#![forbid(clippy::undocumented_unsafe_blocks)] +#![deny(clippy::undocumented_unsafe_blocks)] + +#[cfg(all( + not(miri), + target_arch = "x86_64", + any(target_os = "linux", target_os = "macos", target_os = "freebsd") +))] +macro_rules! if_compiler_is_supported { + ({ + $($if_true:tt)* + } else { + $($if_false:tt)* + }) => { + $($if_true)* + }; + + ($($if_true:tt)*) => { + $($if_true)* + } +} + +#[cfg(not(all( + not(miri), + target_arch = "x86_64", + any(target_os = "linux", target_os = "macos", target_os = "freebsd") +)))] +macro_rules! if_compiler_is_supported { + ({ + $($if_true:tt)* + } else { + $($if_false:tt)* + }) => { + $($if_false)* + }; + + ($($if_true:tt)*) => {} +} mod error; @@ -11,14 +47,10 @@ mod interpreter; mod source_cache; mod tracer; -#[cfg(all(target_arch = "x86_64", target_os = "linux", not(miri)))] -mod compiler; - -#[cfg(not(all(target_arch = "x86_64", target_os = "linux", not(miri))))] -mod compiler_dummy; - -#[cfg(not(all(target_arch = "x86_64", target_os = "linux", not(miri))))] -use compiler_dummy as compiler; +if_compiler_is_supported! { + mod compiler; + mod sandbox; +} pub use polkavm_common::{ error::{ExecutionError, Trap}, @@ -28,7 +60,7 @@ pub use polkavm_common::{ pub use crate::api::{Engine, Func, FuncType, Instance, InstancePre, IntoExternFn, Linker, Module, TypedFunc, Val, ValType}; pub use crate::caller::{Caller, CallerRef}; -pub use crate::config::Config; +pub use crate::config::{BackendKind, Config, SandboxKind}; pub use crate::error::Error; #[cfg(test)] diff --git a/crates/polkavm/src/sandbox.rs b/crates/polkavm/src/sandbox.rs new file mode 100644 index 00000000..f4d9c8d6 --- /dev/null +++ b/crates/polkavm/src/sandbox.rs @@ -0,0 +1,444 @@ +use polkavm_common::{ + abi::VM_PAGE_SIZE, + error::{ExecutionError, Trap}, + init::GuestProgramInit, + program::Reg, + zygote::{ + SandboxMemoryConfig, + VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, + VM_RPC_FLAG_RECONFIGURE, VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, + }, + utils::Access +}; + +use crate::api::BackendAccess; +use crate::config::SandboxKind; + +pub mod generic; + +#[cfg(target_os = "linux")] +pub mod linux; + +// This is literally the only thing we need from `libc` on Linux, so instead of including +// the whole crate let's just define these ourselves. +#[cfg(target_os = "linux")] +const _SC_PAGESIZE: core::ffi::c_int = 30; + +#[cfg(target_os = "linux")] +extern "C" { + fn sysconf(name: core::ffi::c_int) -> core::ffi::c_long; +} + +#[cfg(not(target_os = "linux"))] +use libc::{sysconf, _SC_PAGESIZE}; + +pub(crate) fn get_native_page_size() -> usize { + // TODO: Cache this? + + // SAFETY: This function has no safety invariants and should be always safe to call. + unsafe { sysconf(_SC_PAGESIZE) as usize } +} + +pub(crate) fn assert_native_page_size() { + let native_page_size = get_native_page_size(); + assert!( + native_page_size <= VM_PAGE_SIZE as usize && VM_PAGE_SIZE as usize % native_page_size == 0, + "unsupported native page size: {}", + native_page_size + ); +} + +pub trait SandboxConfig: Default { + fn enable_logger(&mut self, value: bool); +} + +pub trait SandboxAddressSpace { + fn native_code_address(&self) -> u64; +} + +pub trait Sandbox: Sized { + const KIND: SandboxKind; + + type Access<'r>: Access<'r> + Into> where Self: 'r; + type Config: SandboxConfig; + type Error: core::fmt::Debug + core::fmt::Display; + type Program; + type AddressSpace: SandboxAddressSpace; + + fn reserve_address_space() -> Result; + fn prepare_program(init: SandboxProgramInit, address_space: Self::AddressSpace) -> Result; + fn spawn(config: &Self::Config) -> Result; + fn execute(&mut self, args: ExecuteArgs) -> Result<(), ExecutionError>; + fn access(&'_ mut self) -> Self::Access<'_>; +} + +pub type OnHostcall<'a, T> = &'a mut dyn for<'r> FnMut(u64, ::Access<'r>) -> Result<(), Trap>; + +#[derive(Copy, Clone)] +pub struct SandboxProgramInit<'a> { + guest_init: GuestProgramInit<'a>, + code: &'a [u8], + jump_table: &'a [u8], + sysreturn_address: u64, +} + +impl<'a> Default for SandboxProgramInit<'a> { + fn default() -> Self { + Self::new(Default::default()) + } +} + +impl<'a> core::ops::Deref for SandboxProgramInit<'a> { + type Target = GuestProgramInit<'a>; + fn deref(&self) -> &Self::Target { + &self.guest_init + } +} + +impl<'a> SandboxProgramInit<'a> { + pub fn new(guest_init: GuestProgramInit<'a>) -> Self { + Self { + guest_init, + code: &[], + jump_table: &[], + sysreturn_address: 0, + } + } + + pub fn with_code(mut self, code: &'a [u8]) -> Self { + self.code = code; + self + } + + pub fn with_jump_table(mut self, jump_table: &'a [u8]) -> Self { + self.jump_table = jump_table; + self + } + + pub fn with_sysreturn_address(mut self, address: u64) -> Self { + self.sysreturn_address = address; + self + } + + fn memory_config(&self, native_page_size: usize) -> Result { + let mut config = SandboxMemoryConfig::empty(); + config.set_guest_config(self.guest_init.memory_config()?); + config.set_code_size(native_page_size, self.code.len())?; + config.set_jump_table_size(native_page_size, self.jump_table.len())?; + + Ok(config) + } +} + +pub struct ExecuteArgs<'a, T> where T: Sandbox + 'a { + rpc_address: u64, + rpc_flags: u32, + program: Option<&'a T::Program>, + on_hostcall: Option>, + initial_regs: &'a [u32], +} + +impl<'a, T> Default for ExecuteArgs<'a, T> where T: Sandbox { + fn default() -> Self { + Self::new() + } +} + +impl<'a, T> ExecuteArgs<'a, T> where T: Sandbox { + #[inline] + pub fn new() -> Self { + static EMPTY_REGS: &[u32; Reg::ALL_NON_ZERO.len()] = &[0; Reg::ALL_NON_ZERO.len()]; + ExecuteArgs { + rpc_address: 0, + rpc_flags: 0, + program: None, + on_hostcall: None, + initial_regs: EMPTY_REGS, + } + } + + #[inline] + pub fn set_program(&mut self, program: &'a T::Program) { + self.rpc_flags |= VM_RPC_FLAG_RECONFIGURE; + self.program = Some(program); + } + + #[inline] + pub fn set_reset_memory_after_execution(&mut self) { + self.rpc_flags |= VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION; + } + + #[inline] + pub fn set_clear_program_after_execution(&mut self) { + self.rpc_flags |= VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION; + } + + #[inline] + pub fn set_call(&mut self, address: u64) { + self.rpc_address = address; + } + + #[inline] + pub fn set_on_hostcall(&mut self, callback: OnHostcall<'a, T>) { + self.on_hostcall = Some(callback); + } + + #[inline] + pub fn set_initial_regs(&mut self, regs: &'a [u32]) { + assert_eq!(regs.len(), Reg::ALL_NON_ZERO.len()); + self.initial_regs = regs; + } +} + +#[cfg(test)] +macro_rules! sandbox_tests { + ($sandbox_kind:ident) => { + mod $sandbox_kind { + use crate::sandbox::Sandbox as _; + use crate::sandbox::SandboxConfig as _; + use crate::sandbox::SandboxAddressSpace as _; + use crate::sandbox::{SandboxKind, SandboxProgramInit, ExecuteArgs, get_native_page_size}; + use polkavm_assembler::amd64::inst::*; + use polkavm_assembler::amd64::Reg::*; + use polkavm_assembler::amd64::{LoadKind, RegSize, StoreKind}; + use polkavm_assembler::Assembler; + use polkavm_common::init::GuestProgramInit; + use polkavm_common::utils::Access; + use polkavm_common::error::ExecutionError; + + use crate::sandbox::$sandbox_kind::{Sandbox, SandboxConfig}; + + #[test] + fn basic_execution_works() { + let _ = env_logger::try_init(); + + let init = GuestProgramInit::new().with_ro_data(&[0xaa, 0xbb]).with_bss(1); + let init = SandboxProgramInit::new(init); + + let mem = init.memory_config(get_native_page_size()).unwrap(); + let mut asm = Assembler::new(); + if Sandbox::KIND != SandboxKind::Generic { + asm.push(load32_imm(r15, 0)); + } + + asm + .push(load_indirect(rax, RegSize::R64, r15, mem.ro_data_address().try_into().unwrap(), LoadKind::U32)) + .push(store_indirect(RegSize::R64, r15, i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U8)) + .push(store_indirect(RegSize::R64, r15, i32::try_from(mem.rw_data_address()).unwrap() + 4, rax, StoreKind::U16)) + .push(ret()); + + let code = asm.finalize(); + let address_space = Sandbox::reserve_address_space().unwrap(); + let native_code_address = address_space.native_code_address(); + let program = Sandbox::prepare_program(init.with_code(code), address_space).unwrap(); + let mut args = ExecuteArgs::new(); + args.set_program(&program); + args.set_call(native_code_address); + + let mut config = SandboxConfig::default(); + config.enable_logger(true); + + let mut sandbox = Sandbox::spawn(&config).unwrap(); + sandbox.execute(args).unwrap(); + + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 8).unwrap(), + [0xaa, 0x00, 0x00, 0x00, 0xaa, 0xbb, 0x00, 0x00,] + ); + } + + #[test] + fn program_memory_can_be_reused_and_cleared() { + let _ = env_logger::try_init(); + + let init = GuestProgramInit::new().with_bss(1); + let init = SandboxProgramInit::new(init); + let mem = init.memory_config(get_native_page_size()).unwrap(); + let mut asm = Assembler::new(); + if Sandbox::KIND != SandboxKind::Generic { + asm.push(load32_imm(r15, 0)); + } + + asm + .push(load_indirect(rax, RegSize::R64, r15, mem.rw_data_address().try_into().unwrap(), LoadKind::U32)) + .push(add_imm(RegSize::R64, rax, 1)) + .push(store_indirect(RegSize::R64, r15, i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U32)) + .push(ret()); + + let code = asm.finalize(); + let address_space = Sandbox::reserve_address_space().unwrap(); + let native_code_address = address_space.native_code_address(); + let program = Sandbox::prepare_program(init.with_code(code), address_space).unwrap(); + + let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); + assert!(sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).is_err()); + + { + let mut args = ExecuteArgs::new(); + args.set_program(&program); + sandbox.execute(args).unwrap(); + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), + [0x00, 0x00, 0x00, 0x00] + ); + } + + { + let mut args = ExecuteArgs::new(); + args.set_call(native_code_address); + sandbox.execute(args).unwrap(); + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), + [0x01, 0x00, 0x00, 0x00] + ); + } + + { + let mut args = ExecuteArgs::new(); + args.set_call(native_code_address); + sandbox.execute(args).unwrap(); + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), + [0x02, 0x00, 0x00, 0x00] + ); + } + + { + let mut args = ExecuteArgs::new(); + args.set_call(native_code_address); + args.set_reset_memory_after_execution(); + sandbox.execute(args).unwrap(); + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), + [0x00, 0x00, 0x00, 0x00] + ); + } + + { + let mut args = ExecuteArgs::new(); + args.set_call(native_code_address); + sandbox.execute(args).unwrap(); + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), + [0x01, 0x00, 0x00, 0x00] + ); + } + + { + let mut args = ExecuteArgs::new(); + args.set_clear_program_after_execution(); + sandbox.execute(args).unwrap(); + assert!(sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).is_err()); + } + } + + #[test] + fn out_of_bounds_memory_access_generates_a_trap() { + let _ = env_logger::try_init(); + + let init = GuestProgramInit::new().with_bss(1); + let init = SandboxProgramInit::new(init); + let mem = init.memory_config(get_native_page_size()).unwrap(); + let mut asm = Assembler::new(); + if Sandbox::KIND != SandboxKind::Generic { + asm.push(load32_imm(r15, 0)); + } + + asm + .push(load_indirect(rax, RegSize::R64, r15, mem.rw_data_address().try_into().unwrap(), LoadKind::U32)) + .push(add_imm(RegSize::R64, rax, 1)) + .push(store_indirect(RegSize::R64, r15, i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U32)) + .push(load_indirect(rax, RegSize::R64, r15, 0, LoadKind::U32)) + .push(ret()); + + let code = asm.finalize(); + let address_space = Sandbox::reserve_address_space().unwrap(); + let native_code_address = address_space.native_code_address(); + let program = Sandbox::prepare_program(init.with_code(code), address_space).unwrap(); + + let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); + { + let mut args = ExecuteArgs::new(); + args.set_program(&program); + args.set_call(native_code_address); + match sandbox.execute(args) { + Err(ExecutionError::Trap(_)) => {} + _ => panic!(), + } + + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), + [0x01, 0x00, 0x00, 0x00] + ); + } + + // The VM still works even though it got hit with a SIGSEGV. + { + let mut args = ExecuteArgs::new(); + args.set_call(native_code_address); + match sandbox.execute(args) { + Err(ExecutionError::Trap(_)) => {} + _ => panic!(), + } + + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), + [0x02, 0x00, 0x00, 0x00] + ); + } + } + + #[test] + fn divide_by_zero_generates_a_trap() { + if Sandbox::KIND == SandboxKind::Generic { + return; + } + + let _ = env_logger::try_init(); + + let init = GuestProgramInit::new().with_bss(4); + let init = SandboxProgramInit::new(init); + let mem = init.memory_config(get_native_page_size()).unwrap(); + let mut asm = Assembler::new(); + let code = asm + .push(load32_imm(rdx, 0)) + .push(load32_imm(rax, 1)) + .push(load32_imm(rcx, 0)) + .push(load32_imm(r8, 0x11223344)) + .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), r8, StoreKind::U32)) + .push(idiv(RegSize::R32, rcx)) + .push(load32_imm(r8, 0x12345678)) + .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), r8, StoreKind::U32)) + .push(ret()) + .finalize(); + + let address_space = Sandbox::reserve_address_space().unwrap(); + let native_code_address = address_space.native_code_address(); + let program = Sandbox::prepare_program(init.with_code(code), address_space).unwrap(); + let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); + + { + let mut args = ExecuteArgs::new(); + args.set_program(&program); + args.set_call(native_code_address); + match sandbox.execute(args) { + Err(ExecutionError::Trap(_)) => {} + _ => panic!(), + } + + assert_eq!( + sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), + [0x44, 0x33, 0x22, 0x11] + ); + } + } + } + } +} + +#[cfg(test)] +mod tests { + #[cfg(target_os = "linux")] + sandbox_tests!(linux); + sandbox_tests!(generic); +} diff --git a/crates/polkavm/src/sandbox/generic.rs b/crates/polkavm/src/sandbox/generic.rs new file mode 100644 index 00000000..c02f715b --- /dev/null +++ b/crates/polkavm/src/sandbox/generic.rs @@ -0,0 +1,1084 @@ +#![allow(clippy::manual_range_contains)] + +use polkavm_common::{ + error::{ExecutionError, Trap}, + program::Reg, + utils::{byte_slice_init, Access, AsUninitSliceMut}, + zygote::{ + CacheAligned, + SandboxMemoryConfig, + VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, + VM_RPC_FLAG_RECONFIGURE, VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, + VM_ADDR_JUMP_TABLE, + VM_ADDR_JUMP_TABLE_RETURN_TO_HOST, + VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE, + }, +}; + +use super::ExecuteArgs; + +use core::ops::Range; +use core::cell::UnsafeCell; +use core::sync::atomic::{AtomicUsize, Ordering}; +use core::mem::MaybeUninit; +use std::sync::Arc; + +use super::{OnHostcall, SandboxKind, SandboxProgramInit, get_native_page_size}; +use crate::api::{BackendAccess, MemoryAccessError}; + +// On Linux don't depend on the `libc` crate to lower the number of dependencies. +#[cfg(target_os = "linux")] +#[allow(non_camel_case_types)] +mod sys { + pub use polkavm_linux_raw::{c_void, c_int, size_t, c_ulong, siginfo_t, SIG_IGN, SIG_DFL, ucontext as ucontext_t}; + pub const SIGSEGV: c_int = polkavm_linux_raw::SIGSEGV as c_int; + pub const SIGILL: c_int = polkavm_linux_raw::SIGILL as c_int; + pub const PROT_READ: c_int = polkavm_linux_raw::PROT_READ as c_int; + pub const PROT_WRITE: c_int = polkavm_linux_raw::PROT_WRITE as c_int; + pub const PROT_EXEC: c_int = polkavm_linux_raw::PROT_EXEC as c_int; + pub const MAP_ANONYMOUS: c_int = polkavm_linux_raw::MAP_ANONYMOUS as c_int; + pub const MAP_PRIVATE: c_int = polkavm_linux_raw::MAP_PRIVATE as c_int; + pub const MAP_FIXED: c_int = polkavm_linux_raw::MAP_FIXED as c_int; + pub const MAP_FAILED: *mut c_void = !0 as *mut c_void; + pub const SA_SIGINFO: c_int = polkavm_linux_raw::SA_SIGINFO as c_int; + pub const SA_NODEFER: c_int = polkavm_linux_raw::SA_NODEFER as c_int; + + pub type sighandler_t = size_t; + + #[repr(C)] + pub struct sigset_t { + #[cfg(target_pointer_width = "32")] + __val: [u32; 32], + #[cfg(target_pointer_width = "64")] + __val: [u64; 16], + } + + #[repr(C)] + pub struct sigaction { + pub sa_sigaction: sighandler_t, + pub sa_mask: sigset_t, + pub sa_flags: c_int, + pub sa_restorer: Option, + } + + extern "C" { + pub fn mmap( + addr: *mut c_void, + len: size_t, + prot: c_int, + flags: c_int, + fd: c_int, + offset: i64 + ) -> *mut c_void; + + pub fn munmap( + addr: *mut c_void, + len: size_t + ) -> c_int; + + pub fn mprotect( + addr: *mut c_void, + len: size_t, + prot: c_int + ) -> c_int; + + pub fn sigaction( + signum: c_int, + act: *const sigaction, + oldact: *mut sigaction + ) -> c_int; + + pub fn sigemptyset(set: *mut sigset_t) -> c_int; + } +} + +#[cfg(not(target_os = "linux"))] +use libc as sys; + +use sys::{c_int, size_t, PROT_READ, PROT_WRITE, PROT_EXEC, MAP_ANONYMOUS, MAP_PRIVATE, MAP_FIXED}; +use core::ffi::c_void; + +pub(crate) const GUEST_MEMORY_TO_VMCTX_OFFSET: isize = -4096; + +fn get_guest_memory_offset() -> usize { + get_native_page_size() +} + +#[derive(Debug)] +pub struct Error(std::io::Error); + +impl core::fmt::Display for Error { + fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { + self.0.fmt(fmt) + } +} + +impl From<&'static str> for Error { + fn from(value: &'static str) -> Self { + Self(std::io::Error::new(std::io::ErrorKind::Other, value)) + } +} + +impl From for Error { + fn from(error: std::io::Error) -> Self { + Self(error) + } +} + +pub struct Mmap { + pointer: *mut c_void, + length: usize, +} + +// SAFETY: The ownership of an mmapped piece of memory can be safely transferred to other threads. +unsafe impl Send for Mmap {} + +// SAFETY: An mmaped piece of memory can be safely accessed from multiple threads. +unsafe impl Sync for Mmap {} + +impl Mmap { + unsafe fn raw_mmap( + address: *mut c_void, + length: usize, + protection: c_int, + flags: c_int, + ) -> Result { + let pointer = { + let pointer = sys::mmap(address, length, protection, flags, -1, 0); + if pointer == sys::MAP_FAILED { + return Err(Error(std::io::Error::last_os_error())); + } + pointer + }; + + Ok(Self { pointer, length }) + } + + fn mmap_within(&mut self, offset: usize, length: usize, protection: c_int) -> Result<(), Error> { + if !offset.checked_add(length).map(|end| end <= self.length).unwrap_or(false) { + return Err("out of bounds mmap".into()) + } + + // SAFETY: The mapping is always within the bounds of the original map. + unsafe { + let pointer = self.pointer.cast::().add(offset).cast(); + core::mem::forget(Self::raw_mmap(pointer, length, protection, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE)?); + } + + Ok(()) + } + + fn unmap_inplace(&mut self) -> Result<(), Error> { + if self.length > 0 { + // SAFETY: The map is always valid here, so it can be safely unmapped. + unsafe { + if sys::munmap(self.pointer, self.length) < 0 { + return Err(Error(std::io::Error::last_os_error())); + } + } + + self.length = 0; + self.pointer = core::ptr::NonNull::::dangling().as_ptr() as *mut c_void; + } + + Ok(()) + } + + pub fn unmap(mut self) -> Result<(), Error> { + self.unmap_inplace() + } + + pub fn reserve_address_space( + length: size_t + ) -> Result { + // SAFETY: `MAP_FIXED` is not specified, so this is always safe. + unsafe { + Mmap::raw_mmap(core::ptr::null_mut(), length, 0, MAP_ANONYMOUS | MAP_PRIVATE) + } + } + + pub fn mprotect(&mut self, offset: usize, length: usize, protection: c_int) -> Result<(), Error> { + if !offset.checked_add(length).map(|end| end <= self.length).unwrap_or(false) { + return Err("out of bounds mprotect".into()) + } + + // SAFETY: The bounds are always within the range of this map. + unsafe { + if sys::mprotect(self.pointer.add(offset), length, protection) < 0 { + return Err(Error(std::io::Error::last_os_error())); + } + } + + Ok(()) + } + + pub fn modify_and_protect(&mut self, offset: usize, length: usize, protection: c_int, callback: impl FnOnce(&mut [u8])) -> Result<(), Error> { + self.mprotect(offset, length, PROT_READ | PROT_WRITE)?; + callback(&mut self.as_slice_mut()[offset..offset + length]); + if protection != PROT_READ | PROT_WRITE { + self.mprotect(offset, length, protection)?; + } + Ok(()) + } + + pub fn as_ptr(&self) -> *const c_void { + self.pointer + } + + pub fn as_mut_ptr(&self) -> *mut c_void { + self.pointer + } + + pub fn as_slice(&self) -> &[u8] { + // SAFETY: The pointer is either always valid, or is dangling and the length is zero. + // + // The memory might not be mapped as readable, so accessing this slice can still produce + // a segfault, but this is expected due to the low level nature of this helper, and assuming + // the signal handler is correct it cannot result in unsoundness, for the same reason as to why + // the `std::process::abort` is also safe. + unsafe { core::slice::from_raw_parts(self.as_ptr().cast::(), self.length) } + } + + pub fn as_slice_mut(&mut self) -> &mut [u8] { + // SAFETY: The pointer is either always valid, or is dangling and the length is zero. + // + // The memory might not be mapped as readable or writable, so accessing this slice can still produce + // a segfault, but this is expected due to the low level nature of this helper, and assuming + // the signal handler is correct it cannot result in unsoundness, for the same reason as to why + // the `std::process::abort` is also safe. + unsafe { core::slice::from_raw_parts_mut(self.as_mut_ptr().cast::(), self.length) } + } + + pub fn len(&self) -> usize { + self.length + } +} + +impl Default for Mmap { + fn default() -> Self { + Self { + pointer: core::ptr::NonNull::::dangling().as_ptr() as *mut c_void, + length: 0, + } + } +} + +impl Drop for Mmap { + fn drop(&mut self) { + let _ = self.unmap_inplace(); + } +} + +static mut OLD_SIGSEGV: MaybeUninit = MaybeUninit::uninit(); +static mut OLD_SIGILL: MaybeUninit = MaybeUninit::uninit(); + +#[cfg(any(target_os = "macos", target_os = "freebsd"))] +static mut OLD_SIGBUS: MaybeUninit = MaybeUninit::uninit(); + +unsafe extern "C" fn signal_handler(signal: c_int, info: &sys::siginfo_t, context: &sys::ucontext_t) { + let old = match signal { + sys::SIGSEGV => &OLD_SIGSEGV, + sys::SIGILL => &OLD_SIGILL, + #[cfg(any(target_os = "macos", target_os = "freebsd"))] + sys::SIGBUS => &OLD_SIGBUS, + _ => unreachable!("received unknown signal") + }; + + let vmctx = THREAD_VMCTX.with(|thread_ctx| *thread_ctx.get()); + if !vmctx.is_null() { + let rip; + #[cfg(target_os = "linux")] + { + rip = context.uc_mcontext.rip; + } + #[cfg(target_os = "macos")] + { + rip = (*context.uc_mcontext).__ss.__rip; + } + #[cfg(target_os = "freebsd")] + { + rip = context.uc_mcontext.mc_rip as u64; + } + + let vmctx = &mut *vmctx; + if vmctx.program_range.contains(&rip) { + vmctx.native_program_counter = Some(rip); + + log::trace!("Trap triggered at 0x{rip:x}"); + trigger_trap(vmctx); + } + } + + // This signal is unrelated to anything the guest program did; proceed normally. + + let old = &*old.as_ptr(); + if old.sa_sigaction == sys::SIG_IGN || old.sa_sigaction == sys::SIG_DFL { + sys::sigaction(signal, old, core::ptr::null_mut()); + return; + } + + if old.sa_flags & sys::SA_SIGINFO != 0 { + let old_handler = core::mem::transmute::(old.sa_sigaction); + old_handler(signal, info, context); + } else { + let old_handler = core::mem::transmute::(old.sa_sigaction); + old_handler(signal); + } +} + +unsafe fn register_signal_handler_for_signal(signal: c_int, old_sa: &mut MaybeUninit) -> Result<(), Error> { + let mut sa: sys::sigaction = core::mem::zeroed(); + let old_sa = old_sa.write(core::mem::zeroed()); + + sa.sa_flags = sys::SA_SIGINFO | sys::SA_NODEFER; + sa.sa_sigaction = signal_handler as usize; + sys::sigemptyset(&mut sa.sa_mask); + if sys::sigaction(signal, &sa, old_sa) < 0 { + return Err(Error(std::io::Error::last_os_error())); + } + + Ok(()) +} + +unsafe fn register_signal_handlers() -> Result<(), Error> { + register_signal_handler_for_signal(sys::SIGSEGV, &mut OLD_SIGSEGV)?; + register_signal_handler_for_signal(sys::SIGILL, &mut OLD_SIGILL)?; + #[cfg(any(target_os = "macos", target_os = "freebsd"))] + register_signal_handler_for_signal(sys::SIGBUS, &mut OLD_SIGBUS)?; + Ok(()) +} + +fn register_signal_handlers_if_necessary() -> Result<(), Error> { + const STATE_UNINITIALIZED: usize = 0; + const STATE_INITIALIZING: usize = 1; + const STATE_FINISHED: usize = 2; + const STATE_ERROR: usize = 3; + + static FLAG: AtomicUsize = AtomicUsize::new(STATE_UNINITIALIZED); + if FLAG.load(Ordering::Relaxed) == STATE_FINISHED { + return Ok(()); + } + + match FLAG.compare_exchange(STATE_UNINITIALIZED, STATE_INITIALIZING, Ordering::Acquire, Ordering::Relaxed) { + Ok(_) => { + // SAFETY: This can only run once and any parallel invocation will + // wait for the first one that was triggered, so calling this is safe. + let result = unsafe { register_signal_handlers() }; + if let Err(error) = result { + FLAG.store(STATE_ERROR, Ordering::Release); + Err(error) + } else { + FLAG.store(STATE_FINISHED, Ordering::Release); + Ok(()) + } + }, + Err(_) => { + loop { + match FLAG.load(Ordering::Relaxed) { + STATE_INITIALIZING => continue, + STATE_FINISHED => return Ok(()), + _ => return Err("failed to set up signal handlers".into()) + } + } + } + } +} + +thread_local! { + static THREAD_VMCTX: UnsafeCell<*mut VmCtx> = const { UnsafeCell::new(core::ptr::null_mut()) }; +} + +fn trigger_trap(vmctx: &mut VmCtx) -> ! { + debug_assert_ne!(vmctx.return_address, 0); + debug_assert_ne!(vmctx.return_stack_pointer, 0); + + vmctx.trap_triggered = true; + + // SAFETY: This function can only be called while we're executing guest code. + unsafe { + core::arch::asm!(r#" + // Restore the stack pointer to its original value. + mov rsp, [{vmctx} + 8] + + // Jump back + jmp [{vmctx}] + "#, + vmctx = in(reg) vmctx, + options(noreturn) + ); + } +} + +#[repr(C)] +pub struct VmCtx { + // NOTE: These two fields are accessed from inline assembly so they shouldn't be moved! + return_address: usize, + return_stack_pointer: usize, + + program_range: Range, + trap_triggered: bool, + + pub regs: CacheAligned<[u32; 13]>, + on_hostcall: Option>, + sandbox: *mut Sandbox, + instruction_number: Option, + native_program_counter: Option, +} + +impl VmCtx { + /// Creates a fresh VM context. + pub fn new() -> Self { + VmCtx { + return_address: 0, + return_stack_pointer: 0, + trap_triggered: false, + program_range: 0..0, + + regs: CacheAligned([0; 13]), + on_hostcall: None, + sandbox: core::ptr::null_mut(), + instruction_number: None, + native_program_counter: None, + } + } + + #[inline(always)] + pub const fn regs(&self) -> &[u32; 13] { + &self.regs.0 + } +} + +// Make sure it fits within a single page on amd64. +polkavm_common::static_assert!(core::mem::size_of::() <= 4096); + +#[derive(Default)] +pub struct SandboxConfig { +} + +impl super::SandboxConfig for SandboxConfig { + fn enable_logger(&mut self, _value: bool) { + } +} + +unsafe fn vmctx_ptr(memory: &Mmap) -> *const VmCtx { + memory.as_ptr().cast::().offset(get_guest_memory_offset() as isize + GUEST_MEMORY_TO_VMCTX_OFFSET).cast() +} + +unsafe fn vmctx_mut_ptr(memory: &mut Mmap) -> *mut VmCtx { + memory.as_mut_ptr().cast::().offset(get_guest_memory_offset() as isize + GUEST_MEMORY_TO_VMCTX_OFFSET).cast() +} + +pub extern "C" fn handle_ecall(guest_memory: *mut c_void, hostcall: u64) { + // SAFETY: The pointer to the guest memory is always valid here. + let vmctx = unsafe { + &mut *guest_memory.cast::().offset(GUEST_MEMORY_TO_VMCTX_OFFSET).cast::() + }; + + let Some(on_hostcall) = vmctx.on_hostcall.as_mut().take() else { + trigger_trap(vmctx); + }; + + // SAFETY: We were called from the inside of the guest program, so no other + // mutable references to the sandbox can be concurrently alive. + let sandbox = unsafe { + &mut *vmctx.sandbox + }; + + match on_hostcall(hostcall, super::Sandbox::access(sandbox)) { + Ok(()) => {} + Err(_) => trigger_trap(vmctx) + } +} + +pub extern "C" fn handle_trace(guest_memory: *mut c_void, instruction_number: u32) { + // SAFETY: The pointer to the guest memory is always valid here. + let vmctx = unsafe { + &mut *guest_memory.cast::().offset(GUEST_MEMORY_TO_VMCTX_OFFSET).cast::() + }; + + vmctx.instruction_number = Some(instruction_number); + + let Some(on_hostcall) = vmctx.on_hostcall.as_mut().take() else { + return; + }; + + // SAFETY: We were called from the inside of the guest program, so no other + // mutable references to the sandbox can be concurrently alive. + let sandbox = unsafe { + &mut *vmctx.sandbox + }; + + match on_hostcall(polkavm_common::zygote::HOSTCALL_TRACE, super::Sandbox::access(sandbox)) { + Ok(()) => {} + Err(_) => trigger_trap(vmctx) + } +} + +pub struct SandboxProgram(Arc); + +struct SandboxProgramInner { + memory_config: SandboxMemoryConfig, + ro_data: Vec, + rw_data: Vec, + + #[allow(dead_code)] + code_memory: Mmap, +} + +enum Poison { + None, + Executing, + Poisoned, +} + +pub struct Sandbox { + poison: Poison, + program: Option, + memory: Mmap, + memory_config: SandboxMemoryConfig, + guest_memory_offset: usize, +} + +impl Drop for Sandbox { + fn drop(&mut self) { + } +} + +impl Sandbox { + #[inline] + fn vmctx(&self) -> &VmCtx { + // SAFETY: `memory` is always valid and contains a valid `VmCtx`. + unsafe { + &*vmctx_ptr(&self.memory) + } + } + + #[inline] + fn vmctx_mut(&mut self) -> &mut VmCtx { + // SAFETY: `memory` is always valid and contains a valid `VmCtx`. + unsafe { + &mut *vmctx_mut_ptr(&mut self.memory) + } + } + + fn clear_program(&mut self) -> Result<(), ExecutionError> { + let user_memory_region_size = self.memory_config.user_memory_region_size(); + if user_memory_region_size > 0 { + self.memory.mmap_within( + self.guest_memory_offset + self.memory_config.user_memory_region_address() as usize, + self.memory_config.user_memory_region_size() as usize, + 0 + )?; + + self.memory_config.clear_user_memory_sizes(); + } + + if self.memory_config.stack_size() > 0 { + self.memory.mmap_within( + self.guest_memory_offset + self.memory_config.stack_address_low() as usize, + self.memory_config.stack_size() as usize, + 0 + )?; + + self.memory_config.clear_stack_size(); + } + + self.memory_config.clear_code_size(); + self.memory_config.clear_jump_table_size(); + if let Some(program) = self.program.take() { + if let Some(program) = Arc::into_inner(program.0) { + program.code_memory.unmap()?; + } + } + + Ok(()) + } + + fn reset_memory(&mut self) -> Result<(), ExecutionError> { + if let Some(ref program) = self.program { + let program = &program.0; + let rw_data_size = self.memory_config.rw_data_size() as usize; + if rw_data_size > 0 { + let offset = self.guest_memory_offset + self.memory_config.rw_data_address() as usize; + assert!(program.rw_data.len() <= rw_data_size); + + self.memory.as_slice_mut()[offset..offset + program.rw_data.len()].copy_from_slice(&program.rw_data); + self.memory.as_slice_mut()[offset + program.rw_data.len()..offset + self.memory_config.rw_data_size() as usize].fill(0); + } + + let bss_size = self.memory_config.bss_size() as usize; + if bss_size > 0 { + self.memory.mmap_within( + self.guest_memory_offset + self.memory_config.bss_address() as usize, + bss_size, + PROT_READ | PROT_WRITE + )?; + } + + let stack_size = self.memory_config.stack_size() as usize; + if stack_size > 0 { + self.memory.mmap_within( + self.guest_memory_offset + self.memory_config.stack_address_low() as usize, + stack_size, + PROT_READ | PROT_WRITE + )?; + } + } else { + assert_eq!(self.memory_config.ro_data_size(), 0); + assert_eq!(self.memory_config.rw_data_size(), 0); + assert_eq!(self.memory_config.stack_size(), 0); + } + + Ok(()) + } + + fn bound_check_access(&self, address: u32, length: u32) -> Result<(), ()> { + use core::ops::Range; + + #[inline] + fn check(range: Range, access_range: Range) -> Result { + let range = range.start as u64..range.end as u64; + if access_range.end <= range.start || access_range.start >= range.end { + // No overlap. + Ok(false) + } else { + // There is overlap. + if access_range.start >= range.start && access_range.end <= range.end { + Ok(true) + } else { + Err(()) + } + } + } + + let range = address as u64..address as u64 + length as u64; + if check(self.memory_config.ro_data_range(), range.clone())? || check(self.memory_config.heap_range(), range.clone())? || check(self.memory_config.stack_range(), range)? { + Ok(()) + } else { + Err(()) + } + } + + fn get_memory_slice(&self, address: u32, length: u32) -> Option<&[u8]> { + self.bound_check_access(address, length).ok()?; + let range = self.guest_memory_offset + address as usize..self.guest_memory_offset + address as usize + length as usize; + Some(&self.memory.as_slice()[range]) + } + + fn get_memory_slice_mut(&mut self, address: u32, length: u32) -> Option<&mut [u8]> { + self.bound_check_access(address, length).ok()?; + let range = self.guest_memory_offset + address as usize..self.guest_memory_offset + address as usize + length as usize; + Some(&mut self.memory.as_slice_mut()[range]) + } + + fn execute_impl(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError> { + if args.rpc_flags & VM_RPC_FLAG_RECONFIGURE != 0 { + log::trace!("Reconfiguring sandbox..."); + self.clear_program()?; + + let program = &args.program.unwrap().0; + let current = &mut self.memory_config; + let new = program.memory_config; + if new.ro_data_size() > 0 { + let offset = self.guest_memory_offset + new.ro_data_address() as usize; + let length = new.ro_data_size() as usize; + assert!(program.ro_data.len() <= length); + + self.memory.modify_and_protect(offset, length, PROT_READ, |slice| { + slice[..program.ro_data.len()].copy_from_slice(&program.ro_data); + })?; + + let memory_address = self.memory.as_ptr() as usize + offset; + log::trace!( + " New rodata range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", + memory_address, + memory_address + length, + new.ro_data_address(), + new.ro_data_address() + new.ro_data_size(), + new.ro_data_size() + ); + + current.set_ro_data_size(new.ro_data_size()).unwrap(); + } + + if new.rw_data_size() > 0 { + let offset = self.guest_memory_offset + new.rw_data_address() as usize; + let length = new.rw_data_size() as usize; + assert!(program.rw_data.len() <= length); + + self.memory.modify_and_protect(offset, length, PROT_READ | PROT_WRITE, |slice| { + slice[..program.rw_data.len()].copy_from_slice(&program.rw_data); + })?; + + let memory_address = self.memory.as_ptr() as usize + offset; + log::trace!( + " New rwdata range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", + memory_address, + memory_address + length, + new.rw_data_address(), + new.rw_data_address() + new.rw_data_size(), + new.rw_data_size() + ); + + current.set_rw_data_size(new.rw_data_size()).unwrap(); + } + + if new.bss_size() > 0 { + let offset = self.guest_memory_offset + new.bss_address() as usize; + let length = new.bss_size() as usize; + + self.memory.mprotect(offset, length, PROT_READ | PROT_WRITE)?; + + let memory_address = self.memory.as_ptr() as usize + offset; + log::trace!( + " New bss range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", + memory_address, + memory_address + length, + new.bss_address(), + new.bss_address() + new.bss_size(), + new.bss_size() + ); + + current.set_bss_size(new.bss_size()).unwrap(); + } + + if new.stack_size() > 0 { + let offset = self.guest_memory_offset + new.stack_address_low() as usize; + let length = new.stack_size() as usize; + + self.memory.mprotect(offset, length, PROT_READ | PROT_WRITE)?; + + let memory_address = self.memory.as_ptr() as usize + offset; + log::trace!( + " New stack range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", + memory_address, + memory_address + length, + new.stack_address_low(), + new.stack_address_low() + new.stack_size(), + new.stack_size() + ); + + current.set_stack_size(new.stack_size()).unwrap(); + } + + let native_page_size = get_native_page_size(); + current.set_code_size(native_page_size, new.code_size()).unwrap(); + current.set_jump_table_size(native_page_size, new.jump_table_size()).unwrap(); + self.program = Some(SandboxProgram(program.clone())); + + if *current != new { + panic!("internal error: failed to fully update memory configuration"); + } + } + + self.vmctx_mut().regs.copy_from_slice(args.initial_regs); + + let mut trap_triggered = false; + if args.rpc_address != 0 { + { + let Some(program) = self.program.as_ref() else { + return Err(ExecutionError::Trap(Trap::default())); + }; + + let code = &program.0.code_memory; + let address = code.as_ptr() as u64; + self.vmctx_mut().program_range = address..address + code.len() as u64; + } + log::trace!("Jumping to: 0x{:x}", args.rpc_address); + + let on_hostcall: Option> = args.on_hostcall.take(); + // SAFETY: Transmuting an arbitrary lifetime into a 'static lifetime is safe as long as the invariants + // that the shorter lifetime requires are still upheld. + let on_hostcall: Option> = unsafe { core::mem::transmute(on_hostcall) }; + self.vmctx_mut().on_hostcall = on_hostcall; + self.vmctx_mut().sandbox = self; + self.vmctx_mut().trap_triggered = false; + + #[allow(clippy::undocumented_unsafe_blocks)] + unsafe { + let vmctx = vmctx_mut_ptr(&mut self.memory); + THREAD_VMCTX.with(|thread_ctx| core::ptr::write(thread_ctx.get(), vmctx)); + + let guest_memory = self.memory.as_ptr().cast::().add(self.guest_memory_offset); + core::arch::asm!(r#" + push rbp + push rbx + + // Fill in the return address. + lea rbx, [rip+1f] + mov [r14], rbx + + // Fill in the return stack pointer. + mov [r14 + 8], rsp + + // Call into the guest program. + call {entry_point} + + // We will jump here in case of a trap. + 1: + + pop rbx + pop rbp + "#, + entry_point = in(reg) args.rpc_address, + // Mark all of the clobbered registers. + // + // We need to save and restore rbp and rbx manually since + // the inline assembly doesn't support using them as operands. + clobber_abi("C"), + lateout("rax") _, + lateout("rcx") _, + lateout("rdx") _, + lateout("rsi") _, + lateout("rdi") _, + lateout("r8") _, + lateout("r9") _, + lateout("r10") _, + lateout("r11") _, + lateout("r12") _, + lateout("r13") _, + inlateout("r14") vmctx => _, + in("r15") guest_memory, + + ); + + THREAD_VMCTX.with(|thread_ctx| core::ptr::write(thread_ctx.get(), core::ptr::null_mut())); + } + + trap_triggered = core::mem::replace(&mut self.vmctx_mut().trap_triggered, false); + self.vmctx_mut().sandbox = core::ptr::null_mut(); + self.vmctx_mut().on_hostcall = None; + self.vmctx_mut().return_address = 0; + self.vmctx_mut().return_stack_pointer = 0; + self.vmctx_mut().program_range = 0..0; + }; + + if args.rpc_flags & VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION != 0 { + self.clear_program()?; + } else if args.rpc_flags & VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION != 0 { + self.reset_memory()?; + } + + if trap_triggered { + return Err(ExecutionError::Trap(Trap::default())); + } + + Ok(()) + } +} + +impl super::SandboxAddressSpace for Mmap { + fn native_code_address(&self) -> u64 { + self.as_ptr() as u64 + } +} + +impl super::Sandbox for Sandbox { + const KIND: SandboxKind = SandboxKind::Generic; + + type Access<'r> = SandboxAccess<'r>; + type Config = SandboxConfig; + type Error = Error; + type Program = SandboxProgram; + type AddressSpace = Mmap; + + fn reserve_address_space() -> Result { + Mmap::reserve_address_space(VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE as usize + 0x200000000) + } + + fn prepare_program(init: SandboxProgramInit, mut map: Self::AddressSpace) -> Result { + let native_page_size = get_native_page_size(); + let cfg = init.memory_config(native_page_size)?; + + assert_eq!(cfg.code_size() % native_page_size, 0); + assert!(init.code.len() <= cfg.code_size()); + + let jump_table_offset = cfg.code_size(); + let sysreturn_offset = jump_table_offset + (VM_ADDR_JUMP_TABLE_RETURN_TO_HOST - VM_ADDR_JUMP_TABLE) as usize; + + map.modify_and_protect(0, cfg.code_size(), PROT_EXEC, |slice| { + slice[..init.code.len()].copy_from_slice(init.code); + })?; + + map.modify_and_protect(jump_table_offset, cfg.jump_table_size(), PROT_READ, |slice| { + slice[..init.jump_table.len()].copy_from_slice(init.jump_table); + })?; + + map.modify_and_protect(sysreturn_offset, native_page_size, PROT_READ, |slice| { + slice[..8].copy_from_slice(&init.sysreturn_address.to_le_bytes()); + })?; + + log::trace!( + "New code range: 0x{:x}-0x{:x} (0x{:x})", + map.as_ptr() as u64, + map.as_ptr() as u64 + cfg.code_size() as u64, + cfg.code_size() + ); + + log::trace!( + "New jump table range: 0x{:x}-0x{:x} (0x{:x})", + map.as_ptr() as u64 + jump_table_offset as u64, + map.as_ptr() as u64 + jump_table_offset as u64 + cfg.jump_table_size() as u64, + cfg.jump_table_size() + ); + + log::trace!( + "New sysreturn address: 0x{:x} (set at 0x{:x})", + init.sysreturn_address, + map.as_ptr() as u64 + sysreturn_offset as u64 + ); + + Ok(SandboxProgram(Arc::new(SandboxProgramInner { + memory_config: cfg, + ro_data: init.ro_data().to_vec(), + rw_data: init.rw_data().to_vec(), + code_memory: map, + }))) + } + + fn spawn(_config: &SandboxConfig) -> Result { + register_signal_handlers_if_necessary()?; + + let guest_memory_offset = get_guest_memory_offset(); + let mut memory = Mmap::reserve_address_space(guest_memory_offset + 0x100000000)?; + + // Make the space for VmCtx read-write. + polkavm_common::static_assert!(GUEST_MEMORY_TO_VMCTX_OFFSET < 0); + memory.mprotect(0, guest_memory_offset, PROT_READ | PROT_WRITE)?; + + // SAFETY: We just mmaped this and made it read-write. + unsafe { + core::ptr::write(vmctx_mut_ptr(&mut memory), VmCtx::new()); + } + + Ok(Sandbox { + poison: Poison::None, + program: None, + memory, + memory_config: SandboxMemoryConfig::empty(), + guest_memory_offset, + }) + } + + fn execute(&mut self, args: ExecuteArgs) -> Result<(), ExecutionError> { + if !matches!(self.poison, Poison::None) { + return Err(ExecutionError::Error("sandbox has been poisoned".into())); + } + + self.poison = Poison::Executing; + match self.execute_impl(args) { + result @ Err(ExecutionError::Error(_)) => { + self.poison = Poison::Poisoned; + result + } + result @ (Ok(()) | Err(ExecutionError::Trap(_))) => { + self.poison = Poison::None; + result + } + } + } + + #[inline] + fn access(&mut self) -> SandboxAccess { + SandboxAccess { sandbox: self } + } +} + +pub struct SandboxAccess<'a> { + sandbox: &'a mut Sandbox, +} + +impl<'a> From> for BackendAccess<'a> { + fn from(access: SandboxAccess<'a>) -> Self { + BackendAccess::CompiledGeneric(access) + } +} + +impl<'a> Access<'a> for SandboxAccess<'a> { + type Error = MemoryAccessError<&'static str>; + + fn get_reg(&self, reg: Reg) -> u32 { + if reg == Reg::Zero { + return 0; + } + + assert!(!matches!(self.sandbox.poison, Poison::Poisoned), "sandbox has been poisoned"); + self.sandbox.vmctx().regs[reg as usize - 1] + } + + fn set_reg(&mut self, reg: Reg, value: u32) { + if reg == Reg::Zero { + return; + } + + assert!(!matches!(self.sandbox.poison, Poison::Poisoned), "sandbox has been poisoned"); + self.sandbox.vmctx_mut().regs[reg as usize - 1] = value; + } + + fn read_memory_into_slice<'slice, T>(&self, address: u32, buffer: &'slice mut T) -> Result<&'slice mut [u8], Self::Error> + where + T: ?Sized + AsUninitSliceMut, + { + let buffer = buffer.as_uninit_slice_mut(); + log::trace!( + "Reading memory: 0x{:x}-0x{:x} ({} bytes)", + address, + address as usize + buffer.len(), + buffer.len() + ); + + if matches!(self.sandbox.poison, Poison::Poisoned) { + return Err(MemoryAccessError { + address, + length: buffer.len() as u64, + error: "read failed: sandbox has been poisoned", + }); + } + + let Some(slice) = self.sandbox.get_memory_slice(address, buffer.len() as u32) else { + return Err(MemoryAccessError { + address, + length: buffer.len() as u64, + error: "out of range read", + }); + }; + + Ok(byte_slice_init(buffer, slice)) + } + + fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Self::Error> { + log::trace!( + "Writing memory: 0x{:x}-0x{:x} ({} bytes)", + address, + address as usize + data.len(), + data.len() + ); + + if matches!(self.sandbox.poison, Poison::Poisoned) { + return Err(MemoryAccessError { + address, + length: data.len() as u64, + error: "write failed: sandbox has been poisoned", + }); + } + + let Some(slice) = self.sandbox.get_memory_slice_mut(address, data.len() as u32) else { + return Err(MemoryAccessError { + address, + length: data.len() as u64, + error: "out of range write", + }); + }; + + slice.copy_from_slice(data); + Ok(()) + } + + fn program_counter(&self) -> Option { + self.sandbox.vmctx().instruction_number + } + + fn native_program_counter(&self) -> Option { + self.sandbox.vmctx().native_program_counter + } +} diff --git a/crates/polkavm-linux-sandbox/src/lib.rs b/crates/polkavm/src/sandbox/linux.rs similarity index 74% rename from crates/polkavm-linux-sandbox/src/lib.rs rename to crates/polkavm/src/sandbox/linux.rs index 2d7665aa..2d7d26ee 100644 --- a/crates/polkavm-linux-sandbox/src/lib.rs +++ b/crates/polkavm/src/sandbox/linux.rs @@ -1,41 +1,42 @@ -#![doc = include_str!("../README.md")] -#![allow(clippy::collapsible_else_if)] -#![allow(clippy::len_without_is_empty)] +#![allow(clippy::undocumented_unsafe_blocks)] #![allow(clippy::manual_range_contains)] -#![cfg(all(target_os = "linux", target_arch = "x86_64"))] extern crate polkavm_linux_raw as linux_raw; use polkavm_common::{ - abi::VM_PAGE_SIZE, error::{ExecutionError, Trap}, - init::GuestProgramInit, program::Reg, utils::{align_to_next_page_usize, slice_assume_init_mut, Access, AsUninitSliceMut}, zygote::{ SandboxMemoryConfig, VmCtx, SANDBOX_EMPTY_NATIVE_PROGRAM_COUNTER, SANDBOX_EMPTY_NTH_INSTRUCTION, VMCTX_FUTEX_BUSY, - VMCTX_FUTEX_HOSTCALL, VMCTX_FUTEX_IDLE, VMCTX_FUTEX_INIT, VMCTX_FUTEX_TRAP, VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, - VM_RPC_FLAG_RECONFIGURE, VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, VM_RPC_FLAG_SIGSTOP_BEFORE_EXECUTION, + VMCTX_FUTEX_HOSTCALL, VMCTX_FUTEX_IDLE, VMCTX_FUTEX_INIT, VMCTX_FUTEX_TRAP, VM_ADDR_NATIVE_CODE, }, }; +use super::ExecuteArgs; + pub use linux_raw::Error; -use core::ffi::{c_int, c_long, c_uint}; +use core::ffi::{c_int, c_uint}; use core::sync::atomic::Ordering; use linux_raw::{abort, cstr, syscall_readonly, Fd, Mmap, STDERR_FILENO, STDIN_FILENO}; use std::time::Instant; +use super::{OnHostcall, SandboxKind, SandboxProgramInit, get_native_page_size}; +use crate::api::{BackendAccess, MemoryAccessError}; + pub struct SandboxConfig { - pub enable_logger: bool, + enable_logger: bool, } impl SandboxConfig { pub fn new() -> Self { SandboxConfig { enable_logger: false } } +} - pub fn enable_logger(&mut self, value: bool) { +impl super::SandboxConfig for SandboxConfig { + fn enable_logger(&mut self, value: bool) { self.enable_logger = value; } } @@ -218,14 +219,12 @@ impl ChildProcess { Ok(ok) => unsafe { if ok.si_signo() == 0 && ok.si_pid() == 0 { Ok(ChildStatus::Running) + } else if linux_raw::WIFSIGNALED(ok.si_status()) { + Ok(ChildStatus::ExitedDueToSignal(linux_raw::WTERMSIG(ok.si_status()))) + } else if linux_raw::WIFEXITED(ok.si_status()) { + Ok(ChildStatus::Exited(linux_raw::WEXITSTATUS(ok.si_status()))) } else { - if linux_raw::WIFSIGNALED(ok.si_status()) { - Ok(ChildStatus::ExitedDueToSignal(linux_raw::WTERMSIG(ok.si_status()))) - } else if linux_raw::WIFEXITED(ok.si_status()) { - Ok(ChildStatus::Exited(linux_raw::WEXITSTATUS(ok.si_status()))) - } else { - Err(Error::from_last_os_error("waitid failed: internal error: unexpected state")) - } + Err(Error::from_last_os_error("waitid failed: internal error: unexpected state")) } }, Err(error) => { @@ -259,18 +258,6 @@ impl Drop for ChildProcess { } } -fn get_native_page_size() -> usize { - // This is literally the only thing we need from `libc`, so instead of including - // the whole crate let's just define these ourselves. - - const _SC_PAGESIZE: c_int = 30; - extern "C" { - fn sysconf(name: c_int) -> c_long; - } - - unsafe { sysconf(_SC_PAGESIZE) as usize } -} - #[cfg(polkavm_dev_use_built_zygote)] static ZYGOTE_BLOB: &[u8] = include_bytes!("../../polkavm-zygote/target/x86_64-unknown-linux-gnu/release/polkavm-zygote"); @@ -504,103 +491,6 @@ pub struct SandboxProgram { sysreturn_address: u64, } -#[derive(Copy, Clone)] -pub struct SandboxProgramInit<'a> { - guest_init: GuestProgramInit<'a>, - code: &'a [u8], - jump_table: &'a [u8], - sysreturn_address: u64, -} - -impl<'a> Default for SandboxProgramInit<'a> { - fn default() -> Self { - Self::new(Default::default()) - } -} - -impl<'a> core::ops::Deref for SandboxProgramInit<'a> { - type Target = GuestProgramInit<'a>; - fn deref(&self) -> &Self::Target { - &self.guest_init - } -} - -impl<'a> SandboxProgramInit<'a> { - pub fn new(guest_init: GuestProgramInit<'a>) -> Self { - Self { - guest_init, - code: &[], - jump_table: &[], - sysreturn_address: 0, - } - } - - pub fn with_code(mut self, code: &'a [u8]) -> Self { - self.code = code; - self - } - - pub fn with_jump_table(mut self, jump_table: &'a [u8]) -> Self { - self.jump_table = jump_table; - self - } - - pub fn with_sysreturn_address(mut self, address: u64) -> Self { - self.sysreturn_address = address; - self - } - - fn memory_config(&self, native_page_size: usize) -> Result { - let mut config = SandboxMemoryConfig::empty(); - config.set_guest_config(self.guest_init.memory_config()?); - config.set_code_size(native_page_size, self.code.len())?; - config.set_jump_table_size(native_page_size, self.jump_table.len())?; - - Ok(config) - } -} - -impl SandboxProgram { - pub fn new(init: SandboxProgramInit) -> Result { - let native_page_size = get_native_page_size(); - assert!( - native_page_size <= VM_PAGE_SIZE as usize && VM_PAGE_SIZE as usize % native_page_size == 0, - "unsupported native page size: {}", - native_page_size - ); - - let cfg = init.memory_config(native_page_size)?; - let memfd = prepare_sealed_memfd( - cstr!("polkavm_program"), - cfg.ro_data_size() as usize + cfg.rw_data_size() as usize + cfg.code_size() + cfg.jump_table_size(), - |buffer| { - let mut offset = 0; - macro_rules! append { - ($slice:expr, $length:expr) => { - assert!($slice.len() <= $length as usize); - buffer[offset..offset + $slice.len()].copy_from_slice($slice); - #[allow(unused_assignments)] - { - offset += $length as usize; - } - }; - } - - append!(init.ro_data(), cfg.ro_data_size()); - append!(init.rw_data(), cfg.rw_data_size()); - append!(init.code, cfg.code_size()); - append!(init.jump_table, cfg.jump_table_size()); - }, - )?; - - Ok(Self { - memfd, - memory_config: cfg, - sysreturn_address: init.sysreturn_address, - }) - } -} - #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct Map<'a> { pub start: u64, @@ -729,8 +619,6 @@ unsafe fn set_message(vmctx: &VmCtx, message: core::fmt::Arguments) { *vmctx.message_length.get() = length as u32; } -pub type OnHostcall<'a> = &'a mut dyn for<'r> FnMut(u64, SandboxAccess<'r>) -> Result<(), Trap>; - pub struct Sandbox { vmctx_mmap: Mmap, child: ChildProcess, @@ -760,8 +648,59 @@ impl Drop for Sandbox { } } -impl Sandbox { - pub fn spawn(config: &SandboxConfig) -> Result { +impl super::SandboxAddressSpace for () { + fn native_code_address(&self) -> u64 { + VM_ADDR_NATIVE_CODE + } +} + +impl super::Sandbox for Sandbox { + const KIND: SandboxKind = SandboxKind::Linux; + + type Access<'r> = SandboxAccess<'r>; + type Config = SandboxConfig; + type Error = Error; + type Program = SandboxProgram; + type AddressSpace = (); + + fn reserve_address_space() -> Result { + Ok(()) + } + + fn prepare_program(init: SandboxProgramInit, _: Self::AddressSpace) -> Result { + let native_page_size = get_native_page_size(); + let cfg = init.memory_config(native_page_size)?; + let memfd = prepare_sealed_memfd( + cstr!("polkavm_program"), + cfg.ro_data_size() as usize + cfg.rw_data_size() as usize + cfg.code_size() + cfg.jump_table_size(), + |buffer| { + let mut offset = 0; + macro_rules! append { + ($slice:expr, $length:expr) => { + assert!($slice.len() <= $length as usize); + buffer[offset..offset + $slice.len()].copy_from_slice($slice); + #[allow(unused_assignments)] + { + offset += $length as usize; + } + }; + } + + append!(init.ro_data(), cfg.ro_data_size()); + append!(init.rw_data(), cfg.rw_data_size()); + append!(init.code, cfg.code_size()); + append!(init.jump_table, cfg.jump_table_size()); + }, + )?; + + Ok(SandboxProgram { + memfd, + memory_config: cfg, + sysreturn_address: init.sysreturn_address, + }) + } + + fn spawn(config: &SandboxConfig) -> Result { let sigset = Sigmask::block_all_signals()?; let zygote_memfd = prepare_zygote()?; let (vmctx_memfd, vmctx_mmap) = prepare_vmctx()?; @@ -998,6 +937,41 @@ impl Sandbox { }) } + fn execute(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError> { + self.wait_if_necessary(match args.on_hostcall { + Some(ref mut on_hostcall) => Some(&mut *on_hostcall), + None => None, + })?; + + unsafe { + *self.vmctx().rpc_address.get() = args.rpc_address; + *self.vmctx().rpc_flags.get() = args.rpc_flags; + if let Some(program) = args.program { + *self.vmctx().new_memory_config.get() = program.memory_config; + *self.vmctx().new_sysreturn_address.get() = program.sysreturn_address; + } + + (*self.vmctx().regs().get()).copy_from_slice(args.initial_regs); + self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release); + linux_raw::sys_futex_wake_one(&self.vmctx().futex)?; + + if let Some(program) = args.program { + // TODO: This can block forever. + linux_raw::sendfd(self.socket.borrow(), program.memfd.borrow())?; + } + } + + self.wait_if_necessary(args.on_hostcall)?; + Ok(()) + } + + #[inline] + fn access(&mut self) -> SandboxAccess { + SandboxAccess { sandbox: self } + } +} + +impl Sandbox { #[inline] fn vmctx(&self) -> &VmCtx { unsafe { &*self.vmctx_mmap.as_ptr().cast::() } @@ -1005,7 +979,7 @@ impl Sandbox { #[inline(never)] #[cold] - fn wait(&mut self, mut on_hostcall: Option) -> Result<(), ExecutionError> { + fn wait(&mut self, mut on_hostcall: Option>) -> Result<(), ExecutionError> { let mut spin_target = 0; 'outer: loop { self.count_wait_loop_start += 1; @@ -1047,7 +1021,7 @@ impl Sandbox { spin_target = 512; } - match on_hostcall(hostcall, self.access()) { + match on_hostcall(hostcall, super::Sandbox::access(self)) { Ok(()) => { self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release); linux_raw::sys_futex_wake_one(&self.vmctx().futex)?; @@ -1099,54 +1073,27 @@ impl Sandbox { } #[inline] - fn wait_if_necessary(&mut self, on_hostcall: Option) -> Result<(), ExecutionError> { + fn wait_if_necessary(&mut self, on_hostcall: Option>) -> Result<(), ExecutionError> { if self.vmctx().futex.load(Ordering::Relaxed) != VMCTX_FUTEX_IDLE { self.wait(on_hostcall)?; } Ok(()) } - - pub fn execute(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError> { - self.wait_if_necessary(match args.on_hostcall { - Some(ref mut on_hostcall) => Some(&mut *on_hostcall), - None => None, - })?; - - unsafe { - *self.vmctx().rpc_address.get() = args.rpc_address; - *self.vmctx().rpc_flags.get() = args.rpc_flags; - if let Some(program) = args.program { - *self.vmctx().new_memory_config.get() = program.memory_config; - *self.vmctx().new_sysreturn_address.get() = program.sysreturn_address; - } - - (*self.vmctx().regs().get()).copy_from_slice(args.initial_regs); - self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release); - linux_raw::sys_futex_wake_one(&self.vmctx().futex)?; - - if let Some(program) = args.program { - // TODO: This can block forever. - linux_raw::sendfd(self.socket.borrow(), program.memfd.borrow())?; - } - } - - self.wait_if_necessary(args.on_hostcall)?; - Ok(()) - } - - #[inline] - pub fn access(&mut self) -> SandboxAccess { - SandboxAccess { sandbox: self } - } } pub struct SandboxAccess<'a> { sandbox: &'a mut Sandbox, } +impl<'a> From> for BackendAccess<'a> { + fn from(access: SandboxAccess<'a>) -> Self { + BackendAccess::CompiledLinux(access) + } +} + impl<'a> Access<'a> for SandboxAccess<'a> { - type Error = linux_raw::Error; + type Error = MemoryAccessError; fn get_reg(&self, reg: Reg) -> u32 { if reg == Reg::Zero { @@ -1168,7 +1115,7 @@ impl<'a> Access<'a> for SandboxAccess<'a> { } } - fn read_memory_into_slice<'slice, T>(&self, address: u32, buffer: &'slice mut T) -> Result<&'slice mut [u8], Error> + fn read_memory_into_slice<'slice, T>(&self, address: u32, buffer: &'slice mut T) -> Result<&'slice mut [u8], Self::Error> where T: ?Sized + AsUninitSliceMut, { @@ -1181,36 +1128,71 @@ impl<'a> Access<'a> for SandboxAccess<'a> { ); if address as usize + slice.len() > 0xffffffff { - return Err(Error::from_str("out of range read")); + return Err(MemoryAccessError { + address, + length: slice.len() as u64, + error: Error::from_str("out of range read"), + }); } let length = slice.len(); - let actual_length = linux_raw::vm_read_memory(self.sandbox.child.pid, [slice], [(address as usize, length)])?; - if length != actual_length { - return Err(Error::from_str("incomplete read")); + match linux_raw::vm_read_memory(self.sandbox.child.pid, [slice], [(address as usize, length)]) { + Ok(actual_length) if actual_length == length => { + unsafe { Ok(slice_assume_init_mut(slice)) } + }, + Ok(_) => { + Err(MemoryAccessError { + address, + length: slice.len() as u64, + error: Error::from_str("incomplete read"), + }) + }, + Err(error) => { + Err(MemoryAccessError { + address, + length: slice.len() as u64, + error, + }) + } } - - unsafe { Ok(slice_assume_init_mut(slice)) } } - fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Error> { + fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Self::Error> { log::trace!( "Writing memory: 0x{:x}-0x{:x} ({} bytes)", address, address as usize + data.len(), data.len() ); + if address as usize + data.len() > 0xffffffff { - return Err(Error::from_str("out of range write")); + return Err(MemoryAccessError { + address, + length: data.len() as u64, + error: Error::from_str("out of range write"), + }); } let length = data.len(); - let actual_length = linux_raw::vm_write_memory(self.sandbox.child.pid, [data], [(address as usize, length)])?; - if length != actual_length { - return Err(Error::from_str("incomplete write")); + match linux_raw::vm_write_memory(self.sandbox.child.pid, [data], [(address as usize, length)]) { + Ok(actual_length) if actual_length == length => { + Ok(()) + }, + Ok(_) => { + Err(MemoryAccessError { + address, + length: data.len() as u64, + error: Error::from_str("incomplete write"), + }) + }, + Err(error) => { + Err(MemoryAccessError { + address, + length: data.len() as u64, + error, + }) + } } - - Ok(()) } fn program_counter(&self) -> Option { @@ -1233,279 +1215,3 @@ impl<'a> Access<'a> for SandboxAccess<'a> { } } } - -pub struct ExecuteArgs<'a> { - rpc_address: u64, - rpc_flags: u32, - program: Option<&'a SandboxProgram>, - on_hostcall: Option>, - initial_regs: &'a [u32], -} - -impl<'a> Default for ExecuteArgs<'a> { - fn default() -> Self { - Self::new() - } -} - -impl<'a> ExecuteArgs<'a> { - #[inline] - pub fn new() -> Self { - static EMPTY_REGS: &[u32; Reg::ALL_NON_ZERO.len()] = &[0; Reg::ALL_NON_ZERO.len()]; - ExecuteArgs { - rpc_address: 0, - rpc_flags: 0, - program: None, - on_hostcall: None, - initial_regs: EMPTY_REGS, - } - } - - #[inline] - pub fn set_program(&mut self, program: &'a SandboxProgram) { - self.rpc_flags |= VM_RPC_FLAG_RECONFIGURE; - self.program = Some(program); - } - - #[inline] - pub fn set_reset_memory_after_execution(&mut self) { - self.rpc_flags |= VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION; - } - - #[inline] - pub fn set_clear_program_after_execution(&mut self) { - self.rpc_flags |= VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION; - } - - #[inline] - pub fn set_send_sigstop_before_execution(&mut self) { - self.rpc_flags |= VM_RPC_FLAG_SIGSTOP_BEFORE_EXECUTION; - } - - #[inline] - pub fn set_call(&mut self, address: u64) { - self.rpc_address = address; - } - - #[inline] - pub fn set_on_hostcall(&mut self, callback: OnHostcall<'a>) { - self.on_hostcall = Some(callback); - } - - #[inline] - pub fn set_initial_regs(&mut self, regs: &'a [u32]) { - assert_eq!(regs.len(), Reg::ALL_NON_ZERO.len()); - self.initial_regs = regs; - } -} - -#[cfg(test)] -mod tests { - use super::*; - use polkavm_assembler::amd64::inst::*; - use polkavm_assembler::amd64::Reg::*; - use polkavm_assembler::amd64::{LoadKind, RegSize, StoreKind}; - use polkavm_assembler::Assembler; - use polkavm_common::zygote::VM_ADDR_NATIVE_CODE; - - #[test] - fn basic_execution_works() { - let _ = env_logger::try_init(); - - let init = GuestProgramInit::new().with_ro_data(&[0xaa, 0xbb]).with_bss(1); - let init = SandboxProgramInit::new(init); - - let mem = init.memory_config(get_native_page_size()).unwrap(); - let mut asm = Assembler::new(); - let code = asm - .push(load_abs(rax, mem.ro_data_address().try_into().unwrap(), LoadKind::U32)) - .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U8)) - .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap() + 4, rax, StoreKind::U16)) - .push(ret()) - .finalize(); - - let program = SandboxProgram::new(init.with_code(code)).unwrap(); - let mut args = ExecuteArgs::new(); - args.set_program(&program); - args.set_call(VM_ADDR_NATIVE_CODE); - - let mut config = SandboxConfig::default(); - config.enable_logger(true); - - let mut sandbox = Sandbox::spawn(&config).unwrap(); - sandbox.execute(args).unwrap(); - - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 8).unwrap(), - [0xaa, 0x00, 0x00, 0x00, 0xaa, 0xbb, 0x00, 0x00,] - ); - } - - #[test] - fn program_memory_can_be_reused_and_cleared() { - let _ = env_logger::try_init(); - - let init = GuestProgramInit::new().with_bss(1); - let init = SandboxProgramInit::new(init); - let mem = init.memory_config(get_native_page_size()).unwrap(); - let mut asm = Assembler::new(); - let code = asm - .push(load_abs(rax, mem.rw_data_address().try_into().unwrap(), LoadKind::U32)) - .push(add_imm(RegSize::R64, rax, 1)) - .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U32)) - .push(ret()) - .finalize(); - - let program = SandboxProgram::new(init.with_code(code)).unwrap(); - - let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); - assert!(sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).is_err()); - - { - let mut args = ExecuteArgs::new(); - args.set_program(&program); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x00, 0x00, 0x00, 0x00] - ); - } - - { - let mut args = ExecuteArgs::new(); - args.set_call(VM_ADDR_NATIVE_CODE); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x01, 0x00, 0x00, 0x00] - ); - } - - { - let mut args = ExecuteArgs::new(); - args.set_call(VM_ADDR_NATIVE_CODE); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x02, 0x00, 0x00, 0x00] - ); - } - - { - let mut args = ExecuteArgs::new(); - args.set_call(VM_ADDR_NATIVE_CODE); - args.set_reset_memory_after_execution(); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x00, 0x00, 0x00, 0x00] - ); - } - - { - let mut args = ExecuteArgs::new(); - args.set_call(VM_ADDR_NATIVE_CODE); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x01, 0x00, 0x00, 0x00] - ); - } - - { - let mut args = ExecuteArgs::new(); - args.set_clear_program_after_execution(); - sandbox.execute(args).unwrap(); - assert!(sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).is_err()); - } - } - - #[test] - fn out_of_bounds_memory_access_generates_a_trap() { - let _ = env_logger::try_init(); - - let init = GuestProgramInit::new().with_bss(1); - let init = SandboxProgramInit::new(init); - let mem = init.memory_config(get_native_page_size()).unwrap(); - let mut asm = Assembler::new(); - let code = asm - .push(load_abs(rax, mem.rw_data_address().try_into().unwrap(), LoadKind::U32)) - .push(add_imm(RegSize::R64, rax, 1)) - .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U32)) - .push(load_abs(rax, 0, LoadKind::U32)) - .push(ret()) - .finalize(); - - let program = SandboxProgram::new(init.with_code(code)).unwrap(); - - let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); - { - let mut args = ExecuteArgs::new(); - args.set_program(&program); - args.set_call(VM_ADDR_NATIVE_CODE); - match sandbox.execute(args) { - Err(ExecutionError::Trap(_)) => {} - _ => panic!(), - } - - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x01, 0x00, 0x00, 0x00] - ); - } - - // The VM still works even though it got hit with a SIGSEGV. - { - let mut args = ExecuteArgs::new(); - args.set_call(VM_ADDR_NATIVE_CODE); - match sandbox.execute(args) { - Err(ExecutionError::Trap(_)) => {} - _ => panic!(), - } - - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x02, 0x00, 0x00, 0x00] - ); - } - } - - #[test] - fn divide_by_zero_generates_a_trap() { - let _ = env_logger::try_init(); - - let init = GuestProgramInit::new().with_bss(4); - let init = SandboxProgramInit::new(init); - let mem = init.memory_config(get_native_page_size()).unwrap(); - let mut asm = Assembler::new(); - let code = asm - .push(load32_imm(rdx, 0)) - .push(load32_imm(rax, 1)) - .push(load32_imm(rcx, 0)) - .push(load32_imm(r8, 0x11223344)) - .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), r8, StoreKind::U32)) - .push(idiv(RegSize::R32, rcx)) - .push(load32_imm(r8, 0x12345678)) - .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), r8, StoreKind::U32)) - .push(ret()) - .finalize(); - - let program = SandboxProgram::new(init.with_code(code)).unwrap(); - let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); - - { - let mut args = ExecuteArgs::new(); - args.set_program(&program); - args.set_call(VM_ADDR_NATIVE_CODE); - match sandbox.execute(args) { - Err(ExecutionError::Trap(_)) => {} - _ => panic!(), - } - - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x44, 0x33, 0x22, 0x11] - ); - } - } -} diff --git a/crates/polkavm-linux-sandbox/src/polkavm-zygote b/crates/polkavm/src/sandbox/polkavm-zygote similarity index 54% rename from crates/polkavm-linux-sandbox/src/polkavm-zygote rename to crates/polkavm/src/sandbox/polkavm-zygote index 2163da75..2b59ed0b 100755 Binary files a/crates/polkavm-linux-sandbox/src/polkavm-zygote and b/crates/polkavm/src/sandbox/polkavm-zygote differ diff --git a/examples/hosts/doom/Cargo.toml b/examples/hosts/doom/Cargo.toml new file mode 100644 index 00000000..6fa51ff3 --- /dev/null +++ b/examples/hosts/doom/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "doom-host" +version = "0.1.0" +edition = "2021" +publish = false + +[dependencies] +env_logger = { version = "0.10.0", default-features = false } +polkavm = { path = "../../../crates/polkavm" } +sdl2 = { version = "0.35.2", features = ["bundled"] } diff --git a/examples/hosts/doom/README.md b/examples/hosts/doom/README.md new file mode 100644 index 00000000..8ead4a46 --- /dev/null +++ b/examples/hosts/doom/README.md @@ -0,0 +1,21 @@ +# DOOM for PolkaVM + +This is a port of DOOM which runs under PolkaVM. + +You can find the source code of the guest program [here](https://github.com/koute/polkadoom). + +## Running on Linux + +``` +cargo run --release +``` + +## Running on macOS + +``` +LIBRARY_PATH="$LIBRARY_PATH:$(brew --prefix)/lib" POLKAVM_ALLOW_INSECURE=1 POLKAVM_SANDBOX=generic cargo run --target=x86_64-apple-darwin --release +``` + +## Running on other operating systems + +It will run, but it will use an interpreter, which at this moment is *very* slow and won't run full speed. diff --git a/examples/hosts/doom/roms/README.md b/examples/hosts/doom/roms/README.md new file mode 100644 index 00000000..ae19007e --- /dev/null +++ b/examples/hosts/doom/roms/README.md @@ -0,0 +1,3 @@ +This directory contains the following: + * `doom1.wad` - the freely redistributable shareware WAD file for the original DOOM, see `doom-wad-shareware-license.txt` for license details. + * `doom.polkavm` - the DOOM engine binary, licensed under GPLv2+; sources are available [here](https://github.com/koute/polkadoom). diff --git a/examples/hosts/doom/roms/doom-wad-shareware-license.txt b/examples/hosts/doom/roms/doom-wad-shareware-license.txt new file mode 100644 index 00000000..97c96998 --- /dev/null +++ b/examples/hosts/doom/roms/doom-wad-shareware-license.txt @@ -0,0 +1,126 @@ +The Doom 1 shareware WAD file is (C) Copyright id Software. + + LIMITED USE SOFTWARE LICENSE AGREEMENT + +This Limited Use Software License Agreement (the "Agreement") is a legal +agreement between you, the end-user, and Id Software, Inc. ("ID"). By +continuing the installation of this game program, by loading or running +the game, or by placing or copying the game program onto your computer +hard drive, you are agreeing to be bound by the terms of this Agreement. + +ID SOFTWARE LICENSE + + 1. Grant of License. ID grants to you the right to use the +Id Software game program (the "Software"), which is the shareware version +or episode one of the game program. For purposes of this section, "use" +means loading the Software into RAM, as well as installation on a hard disk +or other storage device. You may not: modify, translate, disassemble, +decompile, reverse engineer, or create derivative works based upon the +Software. You agree thatd the Software will not be shipped, transferred or +exported into any country in violation of the U.S. Export Administration Act +and that you will not utilize, in any other manner, the Software in violation +of any applicable law. + + 2. Copyright. The Software is owned by ID and is protected by United +States copyright laws and international treaty provisions. You must treat +the Software like any other copyrighted material, except that you may make +copies of the Software to give to other persons. You may not charge or +receive any consideration from any other person for the receipt or use of +the Software without receiving ID's prior written consent as specified in the +VENDOR.DOC file. You agree to use your best efforts to see that any user of +the Software licensed hereunder complies with this Agreement. + + 3. Limited Warranty. ID warrants that if properly installed and +operated on a computer for which it is designed, the Software will perform +substantially in accordance with its designed purpose for a period of ninety +(90) days from the date the Software is first obtained by an end-user. ID's +entire liability and your exclusive remedy shall be, at ID's option, either +(a) return of the retail price paid, if any, or (b) repair or replacement of +the Software that does not meet ID's Limited Warranty. To make a warranty +claim, return the Software to the point of purchase, accompanied by proof of +purchase, your name, your address, and a statement of defect, or return the +Software with the above information to ID. This Limited Warranty is void if +failure of the Software has resulted in whole or in part from accident, +abuse, misapplication or violation of this Agreement. Any replacement +Software will be warranted for the remainder of the original warranty period +or thirty (30) days, whichever is longer. This warranty allocates risks of +product failure between Licensee and ID. ID's product pricing reflects this +allocation of risk and the limitations of liability contained in this +warranty. + + 4. NO OTHER WARRANTIES. ID DISCLAIMS ALL OTHER WARRANTIES, EITHER +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO, IMPLIED WARRANTIES OF +MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE WITH RESPECT TO THE +SOFTWARE AND THE ACCOMPANYING WRITTEN MATERIALS, IF ANY. THIS LIMITED +WARRANTY GIVES YOU SPECIFIC LEGAL RIGHTS. YOU MAY HAVE OTHERS WHICH VARY +FROM JURISDICTION TO JURISDICTION. ID DOES NOT WARRANT THAT THE OPERATION +OF THE SOFTWARE WILL BE UNINTERRUPTED, ERROR FREE OR MEET LICENSEE'S +SPECIFIC REQUIREMENTS. THE WARRANTY SET FORTH ABOVE IS IN LIEU OF ALL OTHER +EXPRESS WARRANTIES WHETHER ORAL OR WRITTEN. THE AGENTS, EMPLOYEES, +DISTRIBUTORS, AND DEALERS OF ID ARE NOT AUTHORIZED TO MAKE MODIFICATIONS TO +THIS WARRANTY, OR ADDITIONAL WARRANTIES ON BEHALF OF ID. ADDITIONAL +STATEMENTS SUCH AS DEALER ADVERTISING OR PRESENTATIONS, WHETHER ORAL OR +WRITTEN, DO NOT CONSTITUTE WARRANTIES BY ID AND SHOULD NOT BE RELIED UPON. + + 5. Exclusive Remedies. You agree that your exclusive remedy against +ID, its affiliates, contractors, suppliers, and agents for loss or damage +caused by any defect or failure in the Software regardless of the form of +action, whether in contract, tort, including negligence, strict liability or +otherwise, shall be the return of the retail purchase price paid, if any, or +replacement of the Software. This Agreement shall be construed in +accordance with and governed by the laws of the State of Texas. Copyright +and other proprietary matters will be governed by United States laws and +international treaties. IN ANY CASE, ID SHALL NOT BE LIABLE FOR LOSS OF +DATA, LOSS OF PROFITS, LOST SAVINGS, SPECIAL, INCIDENTAL, CONSEQUENTIAL, +INDIRECT OR OTHER SIMILAR DAMAGES ARISING FROM BREACH OF WARRANTY, BREACH OF +CONTRACT, NEGLIGENCE, OR OTHER LEGAL THEORY EVEN IF ID OR ITS AGENT HAS BEEN +ADVISED OF THE POSSIBILITY OF SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER +PARTY. Some jurisdictions do not allow the exclusion or limitation of +incidental or consequential damages, so the above limitation or exclusion +may not apply to you. + + 6. General Provisions. Neither this Agreement nor any part or portion +hereof shall be assigned or sublicensed, except as described herein. Should +any provision of this Agreement be held to be void, invalid, unenforceable or +illegal by a court, the validity and enforceability of the other provisions +shall not be affected thereby. If any provision is determined to be +unenforceable, you agree to a modification of such provision to provide for +enforcement of the provision's intent, to the extent permitted by applicable +law. Failure of a party to enforce any provision of this Agreement shall not +constitute or be construed as a waiver of such provision or of the right to +enforce such provision. If you fail to comply with any terms of this +Agreement, YOUR LICENSE IS AUTOMATICALLY TERMINATED. + + YOU ACKNOWLEDGE THAT YOU HAVE READ THIS AGREEMENT, YOU UNDERSTAND THIS +AGREEMENT, AND UNDERSTAND THAT BY CONTINUING THE INSTALLATION OF THE +SOFTWARE, BY LOADING OR RUNNING THE SOFTWARE, OR BY PLACING OR COPYING THE +SOFTWARE ONTO YOUR COMPUTER HARD DRIVE, YOU AGREE TO BE BOUND BY THIS +AGREEMENT'S TERMS AND CONDITIONS. YOU FURTHER AGREE THAT, EXCEPT FOR WRITTEN +SEPARATE AGREEMENTS BETWEEN ID AND YOU, THIS AGREEMENT IS A COMPLETE AND +EXCLUSIVE STATEMENT OF THE RIGHTS AND LIABILITIES OF THE PARTIES. THIS +AGREEMENT SUPERSEDES ALL PRIOR ORAL AGREEMENTS, PROPOSALS OR UNDERSTANDINGS, +AND ANY OTHER COMMUNICATIONS BETWEEN ID AND YOU RELATING TO THE SUBJECT +MATTER OF THIS AGREEMENT. + +The above license does not appear to grant distribution permission. Email +from John Carmack of ID Software provided this clarification: + +X-Sender: johnc@mail.idsoftware.com +X-Mailer: Windows Eudora Pro Version 3.0 (32) +Date: Sat, 23 Oct 1999 20:01:30 -0500 +To: Joe Drew +From: johnc@idsoftware.com (John Carmack) +Subject: Re: Doom shareware WAD license + +At 08:02 PM 10/23/99 -0400, you wrote: +>Can you give me a definite license on the doom 1 shareware wad? I find certain +>things that say "freely distribute" and others that say "get vendor's license" +>... All I need to have is a license so I can package it up for Debian. +>Thanks. +>Joe + +The DOOM shareware wad is freely distributable. No Quake data is freely +distributable. + +John Carmack + diff --git a/examples/hosts/doom/roms/doom.polkavm b/examples/hosts/doom/roms/doom.polkavm new file mode 100644 index 00000000..f6d00a0e Binary files /dev/null and b/examples/hosts/doom/roms/doom.polkavm differ diff --git a/examples/hosts/doom/roms/doom1.wad b/examples/hosts/doom/roms/doom1.wad new file mode 100644 index 00000000..1a58f662 Binary files /dev/null and b/examples/hosts/doom/roms/doom1.wad differ diff --git a/examples/hosts/doom/src/keys.rs b/examples/hosts/doom/src/keys.rs new file mode 100644 index 00000000..ae0c63a0 --- /dev/null +++ b/examples/hosts/doom/src/keys.rs @@ -0,0 +1,183 @@ +pub const RIGHTARROW: u8 = 0xae; +pub const LEFTARROW: u8 = 0xac; +pub const UPARROW: u8 = 0xad; +pub const DOWNARROW: u8 = 0xaf; +pub const USE: u8 = 0xa2; +pub const FIRE: u8 = 0xa3; +pub const ESCAPE: u8 = 27; +pub const ENTER: u8 = 13; +pub const TAB: u8 = 9; +pub const F1: u8 = 0x80 + 0x3b; +pub const F2: u8 = 0x80 + 0x3c; +pub const F3: u8 = 0x80 + 0x3d; +pub const F4: u8 = 0x80 + 0x3e; +pub const F5: u8 = 0x80 + 0x3f; +pub const F6: u8 = 0x80 + 0x40; +pub const F7: u8 = 0x80 + 0x41; +pub const F8: u8 = 0x80 + 0x42; +pub const F9: u8 = 0x80 + 0x43; +pub const F10: u8 = 0x80 + 0x44; +pub const F11: u8 = 0x80 + 0x57; +pub const F12: u8 = 0x80 + 0x58; + +pub const BACKSPACE: u8 = 0x7f; +pub const PAUSE: u8 = 0xff; + +pub const EQUALS: u8 = 0x3d; +pub const MINUS: u8 = 0x2d; + +pub const RSHIFT: u8 = 0x80 + 0x36; +pub const RCTRL: u8 = 0x80 + 0x1d; +pub const ALT: u8 = 0x80 + 0x38; + +pub const CAPSLOCK: u8 = 0x80 + 0x3a; +pub const SCRLCK: u8 = 0x80 + 0x46; +pub const PRTSCR: u8 = 0x80 + 0x59; + +pub const HOME: u8 = 0x80 + 0x47; +pub const END: u8 = 0x80 + 0x4f; +pub const PGUP: u8 = 0x80 + 0x49; +pub const PGDN: u8 = 0x80 + 0x51; +pub const INS: u8 = 0x80 + 0x52; +pub const DEL: u8 = 0x80 + 0x53; + +pub fn from_sdl2(key: sdl2::keyboard::Keycode) -> Option { + use sdl2::keyboard::Keycode as K; + Some(match key { + K::Right => RIGHTARROW, + K::Left => LEFTARROW, + K::Up => UPARROW, + K::Down => DOWNARROW, + K::Escape => ESCAPE, + K::Return => ENTER, + K::Tab => TAB, + K::F1 => F1, + K::F2 => F2, + K::F3 => F3, + K::F4 => F4, + K::F5 => F5, + K::F6 => F6, + K::F7 => F7, + K::F8 => F8, + K::F9 => F9, + K::F10 => F10, + K::F11 => F11, + K::F12 => F12, + K::Backspace => BACKSPACE, + K::Pause => PAUSE, + K::Equals => EQUALS, + K::Minus => MINUS, + K::LShift | K::RShift => RSHIFT, + K::RCtrl => RCTRL, + K::LAlt | K::RAlt => ALT, + K::CapsLock => CAPSLOCK, + K::ScrollLock => SCRLCK, + K::PrintScreen => PRTSCR, + K::Home => HOME, + K::End => END, + K::PageUp => PGUP, + K::PageDown => PGDN, + K::Insert => INS, + K::Delete => DEL, + + // QWERTY + K::W => UPARROW, + K::A => LEFTARROW, + K::S => DOWNARROW, + K::D => RIGHTARROW, + + // DVORAK + K::Comma => UPARROW, + K::O => DOWNARROW, + K::E => RIGHTARROW, + + //=> STRAFE_R, + K::Space => USE, + K::LCtrl => FIRE, + + // K::A => b'a', + K::B => b'b', + K::C => b'c', + // K::D => b'd', + // K::E => b'e', + K::F => b'f', + K::G => b'g', + K::H => b'h', + K::I => b'i', + K::J => b'j', + K::K => b'k', + K::L => b'l', + K::M => b'm', + K::N => b'n', + // K::O => b'o', + K::P => b'p', + K::Q => b'q', + K::R => b'r', + // K::S => b's', + K::T => b't', + K::U => b'u', + K::V => b'v', + // K::W => b'w', + K::X => b'x', + K::Y => b'y', + K::Z => b'z', + K::Num0 => b'0', + K::Num1 => b'1', + K::Num2 => b'2', + K::Num3 => b'3', + K::Num4 => b'4', + K::Num5 => b'5', + K::Num6 => b'6', + K::Num7 => b'7', + K::Num8 => b'8', + K::Num9 => b'9', + + K::Exclaim => b'!', + K::Quotedbl => b'"', + K::Hash => b'#', + K::Dollar => b'$', + K::Percent => b'%', + K::Ampersand => b'&', + K::Quote => b'\'', + K::LeftParen => b'(', + K::RightParen => b')', + K::Asterisk => b'*', + K::Plus => b'+', + // K::Comma => b',', + K::Period => b'.', + K::Slash => b'/', + K::Colon => b':', + K::Semicolon => b';', + K::Less => b'<', + K::Greater => b'>', + K::Question => b'?', + K::At => b'@', + K::LeftBracket => b'[', + K::Backslash => b'\\', + K::RightBracket => b']', + K::Caret => b'^', + K::Underscore => b'_', + K::Backquote => b'`', + + K::KpDivide => b'/', + K::KpMultiply => b'*', + K::KpMinus => b'-', + K::KpPlus => b'+', + K::KpEnter => ENTER, + K::KpPeriod => 0, + K::KpEquals => EQUALS, + + K::Kp0 => 0, + K::Kp1 => END, + K::Kp2 => DOWNARROW, + K::Kp3 => PGDN, + K::Kp4 => LEFTARROW, + K::Kp5 => b'5', + K::Kp6 => RIGHTARROW, + K::Kp7 => HOME, + K::Kp8 => UPARROW, + K::Kp9 => PGUP, + + _ => return None, + }) +} diff --git a/examples/hosts/doom/src/main.rs b/examples/hosts/doom/src/main.rs new file mode 100644 index 00000000..ff6426a1 --- /dev/null +++ b/examples/hosts/doom/src/main.rs @@ -0,0 +1,182 @@ +#![deny(unreachable_patterns)] + +use crate::vm::Vm; +use polkavm::ProgramBlob; +use sdl2::event::Event; +use sdl2::pixels::{Color, PixelFormatEnum}; +use sdl2::rect::Rect; +use std::rc::Rc; + +mod keys; +mod vm; + +fn main() { + env_logger::init(); + + let mut program_override = None; + let mut rom_override = None; + for arg in std::env::args().skip(1) { + let bytes = std::fs::read(arg).unwrap(); + if bytes.starts_with(b"PVM\0") { + program_override = Some(bytes); + } else { + rom_override = Some(bytes); + } + } + + const DOOM_PROGRAM: &[u8] = include_bytes!("../roms/doom.polkavm"); + const DOOM_ROM: &[u8] = include_bytes!("../roms/doom1.wad"); + + let blob = ProgramBlob::parse(program_override.as_deref().unwrap_or(DOOM_PROGRAM)).unwrap(); + let mut vm = Vm::from_blob(blob).unwrap(); + + vm.initialize(rom_override.as_deref().unwrap_or(DOOM_ROM)).unwrap(); + + let sdl_context = sdl2::init().unwrap(); + let video_context = sdl_context.video().unwrap(); + let audio_context = sdl_context.audio().unwrap(); + let mut event_pump = sdl_context.event_pump().unwrap(); + let window = video_context + .window("polkadoom", 640, 400) + .position_centered() + .resizable() + .build() + .unwrap(); + + let mut canvas = window.into_canvas().build().unwrap(); + let texture_creator = canvas.texture_creator(); + let mut texture = None; + + canvas.set_draw_color(Color::RGB(0, 0, 0)); + canvas.clear(); + canvas.present(); + + let audio_queue = audio_context + .open_queue::>( + None, + &sdl2::audio::AudioSpecDesired { + freq: Some(44100), + channels: Some(2), + samples: Some(512), + }, + ) + .unwrap(); + + let audio_queue = Rc::new(audio_queue); + audio_queue.resume(); + + let queue = audio_queue.clone(); + vm.set_on_audio_frame(move |buffer| { + let _ = queue.queue_audio(buffer); + }); + + let mut keys: [isize; 256] = [0; 256]; + loop { + loop { + while let Some(event) = event_pump.poll_event() { + let key_change = match event { + Event::Quit { .. } => { + std::process::exit(0); + } + Event::KeyDown { + keycode: Some(keycode), + repeat, + .. + } if !repeat => crate::keys::from_sdl2(keycode).map(|key| (key, true)), + Event::KeyUp { + keycode: Some(keycode), + repeat, + .. + } if !repeat => crate::keys::from_sdl2(keycode).map(|key| (key, false)), + Event::MouseButtonDown { + mouse_btn: sdl2::mouse::MouseButton::Left, + .. + } => Some((crate::keys::FIRE, true)), + Event::MouseButtonUp { + mouse_btn: sdl2::mouse::MouseButton::Left, + .. + } => Some((crate::keys::FIRE, false)), + Event::MouseButtonDown { + mouse_btn: sdl2::mouse::MouseButton::Right, + .. + } => Some((crate::keys::ALT, true)), + Event::MouseButtonUp { + mouse_btn: sdl2::mouse::MouseButton::Right, + .. + } => Some((crate::keys::ALT, false)), + Event::MouseButtonDown { + mouse_btn: sdl2::mouse::MouseButton::Middle, + .. + } => Some((crate::keys::USE, true)), + Event::MouseButtonUp { + mouse_btn: sdl2::mouse::MouseButton::Middle, + .. + } => Some((crate::keys::USE, false)), + _ => None, + }; + + if let Some((key, is_pressed)) = key_change { + let before = keys[key as usize] > 0; + if is_pressed { + keys[key as usize] += 1; + } else { + keys[key as usize] -= 1; + } + + let after = keys[key as usize] > 0; + if before != after { + vm.on_keychange(key, after).unwrap(); + } + } + } + + let samples_queued = audio_queue.size() / 4; + let samples_per_millisecond = 44100.0 / 1000.0; + let milliseconds_queued = samples_queued as f32 / samples_per_millisecond as f32; + if milliseconds_queued < 32.0 { + break; + } + + std::thread::sleep(core::time::Duration::from_millis(1)); + } + + let Ok((width, height, frame)) = vm.run_for_a_frame() else { + break; + }; + + canvas.clear(); + if !frame.is_empty() { + if let Some((_, texture_width, texture_height)) = texture { + if width != texture_width || height != texture_height { + texture = None; + } + } + + let (texture, tex_width, tex_height) = if let Some((ref mut texture, width, height)) = texture { + (texture, width, height) + } else { + let tex = texture_creator + .create_texture_streaming(PixelFormatEnum::ARGB8888, width, height) + .unwrap(); + + texture = Some((tex, width, height)); + (&mut texture.as_mut().unwrap().0, width, height) + }; + + let (display_width, display_height) = canvas.output_size().unwrap(); + let aspect = tex_width as f32 / tex_height as f32; + let out_width = core::cmp::min(display_width, (display_height as f32 * aspect) as u32); + + texture.update(None, frame, width as usize * 4).unwrap(); + canvas + .copy( + texture, + None, + Some(Rect::new(((display_width - out_width) / 2) as i32, 0, out_width, display_height)), + ) + .unwrap(); + } + + canvas.present(); + } +} diff --git a/examples/hosts/doom/src/vm.rs b/examples/hosts/doom/src/vm.rs new file mode 100644 index 00000000..d647413d --- /dev/null +++ b/examples/hosts/doom/src/vm.rs @@ -0,0 +1,147 @@ +use core::mem::MaybeUninit; +use polkavm::{Caller, Config, Engine, ExecutionError, Linker, Module, ProgramBlob, Trap, TypedFunc}; + +struct State { + rom: Vec, + frame: Vec, + frame_width: u32, + frame_height: u32, + audio_buffer: Vec, + #[allow(clippy::type_complexity)] + on_audio_frame: Option>, +} + +pub struct Vm { + state: State, + ext_initialize: TypedFunc, + ext_tick: TypedFunc, + ext_on_keychange: TypedFunc, +} + +impl Vm { + pub fn from_blob(blob: ProgramBlob) -> Result { + let config = Config::from_env()?; + let engine = Engine::new(&config)?; + let module = Module::from_blob(&engine, &blob)?; + let mut linker = Linker::new(&engine); + + linker.func_wrap( + "ext_output_video", + |caller: Caller, address: u32, width: u32, height: u32| -> Result<(), Trap> { + let (caller, state) = caller.split(); + let length = width * height * 4; + state.frame.clear(); + state.frame.reserve(length as usize); + caller.read_memory_into_slice(address, &mut state.frame.spare_capacity_mut()[..length as usize])?; + unsafe { + state.frame.set_len(length as usize); + } + state.frame_width = width; + state.frame_height = height; + + Ok(()) + }, + )?; + + linker.func_wrap( + "ext_output_audio", + |caller: Caller, address: u32, samples: u32| -> Result<(), Trap> { + let (caller, state) = caller.split(); + let Some(on_audio_frame) = state.on_audio_frame.as_mut() else { + return Ok(()); + }; + + state.audio_buffer.reserve(samples as usize * 2); + + { + let audio_buffer: &mut [MaybeUninit] = &mut state.audio_buffer.spare_capacity_mut()[..samples as usize * 2]; + let audio_buffer: &mut [MaybeUninit] = unsafe { + core::slice::from_raw_parts_mut(audio_buffer.as_mut_ptr().cast(), audio_buffer.len() * core::mem::size_of::()) + }; + caller.read_memory_into_slice(address, audio_buffer)?; + } + + unsafe { + let new_length = state.audio_buffer.len() + samples as usize * 2; + state.audio_buffer.set_len(new_length); + } + + on_audio_frame(&state.audio_buffer); + state.audio_buffer.clear(); + Ok(()) + }, + )?; + + linker.func_wrap("ext_rom_size", |caller: Caller| -> u32 { caller.data().rom.len() as u32 })?; + + linker.func_wrap( + "ext_rom_read", + |caller: Caller, pointer: u32, offset: u32, length: u32| -> Result<(), Trap> { + let (mut caller, state) = caller.split(); + let chunk = state + .rom + .get(offset as usize..offset as usize + length as usize) + .ok_or_else(Trap::default)?; + + caller.write_memory(pointer, chunk) + }, + )?; + + linker.func_wrap( + "ext_stdout", + |caller: Caller, buffer: u32, length: u32| -> Result { + if length == 0 { + return Ok(0); + } + + use std::io::Write; + let buffer = caller.read_memory_into_new_vec(buffer, length)?; + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + if stdout.write_all(&buffer).is_ok() { + Ok(buffer.len() as i32) + } else { + Ok(-32) // EPIPE + } + }, + )?; + + let instance_pre = linker.instantiate_pre(&module)?; + let instance = instance_pre.instantiate()?; + let ext_initialize = instance.get_typed_func::<(), ()>("ext_initialize")?; + let ext_tick = instance.get_typed_func::<(), ()>("ext_tick")?; + let ext_on_keychange = instance.get_typed_func::<(u32, u32), ()>("ext_on_keychange")?; + + Ok(Self { + state: State { + rom: Default::default(), + frame: Default::default(), + frame_width: 0, + frame_height: 0, + audio_buffer: Default::default(), + on_audio_frame: None, + }, + ext_initialize, + ext_tick, + ext_on_keychange, + }) + } + + pub fn set_on_audio_frame(&mut self, callback: impl FnMut(&[i16]) + 'static) { + self.state.on_audio_frame = Some(Box::new(callback)); + } + + pub fn initialize(&mut self, rom: impl Into>) -> Result<(), ExecutionError> { + self.state.rom = rom.into(); + self.ext_initialize.call(&mut self.state, ()) + } + + pub fn run_for_a_frame(&mut self) -> Result<(u32, u32, &[u8]), ExecutionError> { + self.ext_tick.call(&mut self.state, ())?; + Ok((self.state.frame_width, self.state.frame_height, &self.state.frame)) + } + + pub fn on_keychange(&mut self, key: u8, is_pressed: bool) -> Result<(), ExecutionError> { + self.ext_on_keychange.call(&mut self.state, (key as u32, is_pressed as u32)) + } +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 743f7cd9..7eb23c42 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "1.72.0" +channel = "1.72.1"