From 85ebaeae4719816c7302b1ffda9e7e1bd11f3524 Mon Sep 17 00:00:00 2001 From: "Panagiotis \"Ivory\" Vasilopoulos" Date: Tue, 25 Jun 2024 15:35:35 +0200 Subject: [PATCH] ASLR: Lay down the foundations - Move x86_64-related paging code to src/arch/x86_64/paging - Tests: x86_64-related paging tests should use a guest_address that is not 0 - Tests: Move them in separate files, use appropriate 'use' directives - Fix kernel memory loading - Add guest_address getter in UhyveVm - Change names of constants to clarify their purpose - Use u64 for arch::RAM_START instead of GuestVirtAddr - Remove pagetable_l0 from virt_to_phys function - Various `cargo fmt`-related changes We currently rely on guest_address in MmapMemory to calculate the offsets during the initialization of the VM and when converting virtual addresses to physical addresses. The latter case is intended to be temporary - we should read the value from the CR3 register at a later point. Given this change, the arch::RAM_START's type was changed from GuestVirtAddr to u64. Although this current revision does work with relocatable binaries, it is not making use of this functionality _just_ yet. Fixes #719. Co-authored-by: Jonathan --- src/arch/x86_64/mod.rs | 190 +++++++--------------------------- src/arch/x86_64/paging/mod.rs | 157 ++++++++++++++++++++++++++++ src/consts.rs | 19 ++-- src/hypercall.rs | 23 ++-- src/linux/gdb/breakpoints.rs | 11 +- src/linux/gdb/mod.rs | 6 +- src/linux/x86_64/kvm_cpu.rs | 34 ++++-- src/macos/x86_64/vcpu.rs | 23 ++-- src/vm.rs | 43 ++++++-- 9 files changed, 293 insertions(+), 213 deletions(-) create mode 100644 src/arch/x86_64/paging/mod.rs diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index ff2c82f56..f1b4ca4f3 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,3 +1,4 @@ +pub mod paging; pub mod registers; use core::arch::x86_64::_rdtsc as rdtsc; @@ -10,17 +11,14 @@ use log::{debug, warn}; use raw_cpuid::{CpuId, CpuIdReaderNative}; use thiserror::Error; use uhyve_interface::{GuestPhysAddr, GuestVirtAddr}; -use x86_64::{ - structures::paging::{ - page_table::{FrameError, PageTableEntry}, - Page, PageTable, PageTableFlags, PageTableIndex, Size2MiB, - }, - PhysAddr, +use x86_64::structures::paging::{ + page_table::{FrameError, PageTableEntry}, + PageTable, PageTableIndex, }; -use crate::{consts::*, mem::MmapMemory, paging::PagetableError}; +use crate::{consts::PML4_OFFSET, mem::MmapMemory, paging::PagetableError}; -pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); +pub const RAM_START: u64 = 0; const MHZ_TO_HZ: u64 = 1000000; const KHZ_TO_HZ: u64 = 1000; @@ -111,92 +109,10 @@ pub fn get_cpu_frequency_from_os() -> std::result::Result u64 { - ((base & 0xff000000u64) << (56 - 24)) - | ((flags & 0x0000f0ffu64) << 40) - | ((limit & 0x000f0000u64) << (48 - 16)) - | ((base & 0x00ffffffu64) << 16) - | (limit & 0x0000ffffu64) -} - -pub const MIN_PHYSMEM_SIZE: usize = BOOT_PDE.as_u64() as usize + 0x1000; - -/// Creates the pagetables and the GDT in the guest memory space. -/// -/// The memory slice must be larger than [`MIN_PHYSMEM_SIZE`]. -/// Also, the memory `mem` needs to be zeroed for [`PAGE_SIZE`] bytes at the -/// offsets [`BOOT_PML4`] and [`BOOT_PDPTE`], otherwise the integrity of the -/// pagetables and thus the integrity of the guest's memory is not ensured -pub fn initialize_pagetables(mem: &mut [u8]) { - assert!(mem.len() >= MIN_PHYSMEM_SIZE); - let mem_addr = std::ptr::addr_of_mut!(mem[0]); - - let (gdt_entry, pml4, pdpte, pde); - // Safety: - // We only operate in `mem`, which is plain bytes and we have ownership of - // these and it is asserted to be large enough. - unsafe { - gdt_entry = mem_addr - .add(BOOT_GDT.as_u64() as usize) - .cast::<[u64; 3]>() - .as_mut() - .unwrap(); - - pml4 = mem_addr - .add(BOOT_PML4.as_u64() as usize) - .cast::() - .as_mut() - .unwrap(); - pdpte = mem_addr - .add(BOOT_PDPTE.as_u64() as usize) - .cast::() - .as_mut() - .unwrap(); - pde = mem_addr - .add(BOOT_PDE.as_u64() as usize) - .cast::() - .as_mut() - .unwrap(); - - /* For simplicity we currently use 2MB pages and only a single - PML4/PDPTE/PDE. */ - - // per default is the memory zeroed, which we allocate by the system - // call mmap, so the following is not necessary: - /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ - } - // initialize GDT - gdt_entry[BOOT_GDT_NULL] = 0; - gdt_entry[BOOT_GDT_CODE] = create_gdt_entry(0xA09B, 0, 0xFFFFF); - gdt_entry[BOOT_GDT_DATA] = create_gdt_entry(0xC093, 0, 0xFFFFF); - - pml4[0].set_addr( - BOOT_PDPTE, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pml4[511].set_addr( - BOOT_PML4, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); - - for i in 0..512 { - let addr = PhysAddr::new(i as u64 * Page::::SIZE); - pde[i].set_addr( - addr, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, - ); - } -} - /// Converts a virtual address in the guest to a physical address in the guest pub fn virt_to_phys( addr: GuestVirtAddr, mem: &MmapMemory, - pagetable_l0: GuestPhysAddr, ) -> Result { /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). pub const PAGE_BITS: u64 = 12; @@ -205,7 +121,9 @@ pub fn virt_to_phys( pub const PAGE_MAP_BITS: usize = 9; let mut page_table = - unsafe { (mem.host_address(pagetable_l0).unwrap() as *mut PageTable).as_mut() }.unwrap(); + // TODO: Too cursed? + unsafe { (mem.host_address(GuestPhysAddr::new(mem.guest_address.as_u64() + PML4_OFFSET)).unwrap() as *mut PageTable).as_mut() } + .unwrap(); let mut page_bits = 39; let mut entry = PageTableEntry::new(); @@ -232,14 +150,16 @@ pub fn virt_to_phys( Ok(entry.addr() + (addr.as_u64() & !((!0u64) << PAGE_BITS))) } -pub fn init_guest_mem(mem: &mut [u8]) { - // TODO: we should maybe return an error on failure (e.g., the memory is too small) - initialize_pagetables(mem); -} - #[cfg(test)] mod tests { + use x86_64::structures::paging::PageTableFlags; + use super::*; + use crate::{ + consts::{MIN_PHYSMEM_SIZE, PDE_OFFSET, PDPTE_OFFSET, PML4_OFFSET}, + x86_64::paging::initialize_pagetables, + }; + // test is derived from // https://github.com/gz/rust-cpuid/blob/master/examples/tsc_frequency.rs #[test] @@ -320,81 +240,43 @@ mod tests { } #[test] - fn test_pagetable_initialization() { - let mut mem: Vec = vec![0; MIN_PHYSMEM_SIZE]; - initialize_pagetables((&mut mem[0..MIN_PHYSMEM_SIZE]).try_into().unwrap()); - - // Test pagetable setup - let addr_pdpte = u64::from_le_bytes( - mem[(BOOT_PML4.as_u64() as usize)..(BOOT_PML4.as_u64() as usize + 8)] - .try_into() - .unwrap(), - ); - assert_eq!( - addr_pdpte, - BOOT_PDPTE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() - ); - let addr_pde = u64::from_le_bytes( - mem[(BOOT_PDPTE.as_u64() as usize)..(BOOT_PDPTE.as_u64() as usize + 8)] - .try_into() - .unwrap(), + fn test_virt_to_phys() { + let guest_address = 0x11111000; + let mem = MmapMemory::new( + 0, + MIN_PHYSMEM_SIZE * 2, + GuestPhysAddr::new(guest_address), + true, + true, ); - assert_eq!( - addr_pde, - BOOT_PDE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + initialize_pagetables( + unsafe { mem.as_slice_mut() }.try_into().unwrap(), + guest_address, ); - for i in (0..4096).step_by(8) { - let addr = BOOT_PDE.as_u64() as usize + i; - let entry = u64::from_le_bytes(mem[addr..(addr + 8)].try_into().unwrap()); - assert!( - PageTableFlags::from_bits_truncate(entry) - .difference( - PageTableFlags::PRESENT - | PageTableFlags::WRITABLE - | PageTableFlags::HUGE_PAGE - ) - .is_empty(), - "Pagetable bits at {addr:#x} are incorrect" - ) - } - - // Test GDT - let gdt_results = [0x0, 0xAF9B000000FFFF, 0xCF93000000FFFF]; - for (i, res) in gdt_results.iter().enumerate() { - let gdt_addr = BOOT_GDT.as_u64() as usize + i * 8; - let gdt_entry = u64::from_le_bytes(mem[gdt_addr..gdt_addr + 8].try_into().unwrap()); - assert_eq!(*res, gdt_entry); - } - } - - #[test] - fn test_virt_to_phys() { - let mem = MmapMemory::new(0, MIN_PHYSMEM_SIZE * 2, GuestPhysAddr::new(0), true, true); - initialize_pagetables(unsafe { mem.as_slice_mut() }.try_into().unwrap()); - // Get the address of the first entry in PML4 (the address of the PML4 itself) let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PML4); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, GuestPhysAddr::new(guest_address + PML4_OFFSET)); // The last entry on the PML4 is the address of the PML4 with flags let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000 | (4096 - 8)); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); assert_eq!( mem.read::(p_addr).unwrap(), - BOOT_PML4.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + (guest_address + PML4_OFFSET) + | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() ); // the first entry on the 3rd level entry in the pagetables is the address of the boot pdpte let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFE00000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PDPTE); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, GuestPhysAddr::new(guest_address + PDPTE_OFFSET)); // the first entry on the 2rd level entry in the pagetables is the address of the boot pde let virt_addr = GuestVirtAddr::new(0xFFFFFFFFC0000000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PDE); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, GuestPhysAddr::new(guest_address + PDE_OFFSET)); // That address points to a huge page assert!( PageTableFlags::from_bits_truncate(mem.read::(p_addr).unwrap()).contains( diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs new file mode 100644 index 000000000..421c15514 --- /dev/null +++ b/src/arch/x86_64/paging/mod.rs @@ -0,0 +1,157 @@ +use uhyve_interface::GuestPhysAddr; +use x86_64::{ + structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, + PhysAddr, +}; + +use crate::consts::*; + +/// Creates the pagetables and the GDT in the guest memory space. +/// +/// The memory slice must be larger than [`MIN_PHYSMEM_SIZE`]. +/// Also, the memory `mem` needs to be zeroed for [`PAGE_SIZE`] bytes at the +/// offsets [`BOOT_PML4`] and [`BOOT_PDPTE`], otherwise the integrity of the +/// pagetables and thus the integrity of the guest's memory is not ensured +pub fn initialize_pagetables(mem: &mut [u8], guest_address: u64) { + assert!(mem.len() >= MIN_PHYSMEM_SIZE); + let mem_addr = std::ptr::addr_of_mut!(mem[0]); + + let (gdt_entry, pml4, pdpte, pde); + // Safety: + // We only operate in `mem`, which is plain bytes and we have ownership of + // these and it is asserted to be large enough. + unsafe { + gdt_entry = mem_addr + .add(GDT_OFFSET as usize) + .cast::<[u64; 3]>() + .as_mut() + .unwrap(); + + pml4 = mem_addr + .add(PML4_OFFSET as usize) + .cast::() + .as_mut() + .unwrap(); + pdpte = mem_addr + .add(PDPTE_OFFSET as usize) + .cast::() + .as_mut() + .unwrap(); + pde = mem_addr + .add(PDE_OFFSET as usize) + .cast::() + .as_mut() + .unwrap(); + + /* For simplicity we currently use 2MB pages and only a single + PML4/PDPTE/PDE. */ + + // per default is the memory zeroed, which we allocate by the system + // call mmap, so the following is not necessary: + /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ + } + // initialize GDT + gdt_entry[BOOT_GDT_NULL] = 0; + gdt_entry[BOOT_GDT_CODE] = create_gdt_entry(0xA09B, 0, 0xFFFFF); + gdt_entry[BOOT_GDT_DATA] = create_gdt_entry(0xC093, 0, 0xFFFFF); + + pml4[0].set_addr( + GuestPhysAddr::new(guest_address + PDPTE_OFFSET), + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pml4[511].set_addr( + GuestPhysAddr::new(guest_address + PML4_OFFSET), + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pdpte[0].set_addr( + GuestPhysAddr::new(guest_address + PDE_OFFSET), + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + + for i in 0..512 { + let addr = PhysAddr::new(i as u64 * Page::::SIZE); + pde[i].set_addr( + addr, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, + ); + } +} + +pub fn init_guest_mem(mem: &mut [u8], guest_address: u64) { + // TODO: we should maybe return an error on failure (e.g., the memory is too small) + initialize_pagetables(mem, guest_address); +} + +// Constructor for a conventional segment GDT (or LDT) entry +pub fn create_gdt_entry(flags: u64, base: u64, limit: u64) -> u64 { + ((base & 0xff000000u64) << (56 - 24)) + | ((flags & 0x0000f0ffu64) << 40) + | ((limit & 0x000f0000u64) << (48 - 16)) + | ((base & 0x00ffffffu64) << 16) + | (limit & 0x0000ffffu64) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consts::{GDT_OFFSET, PDE_OFFSET, PDPTE_OFFSET, PML4_OFFSET}; + + #[test] + fn test_pagetable_initialization() { + let guest_address = 0x15000; + + let mut mem: Vec = vec![0; MIN_PHYSMEM_SIZE]; + // This will return a pagetable setup that we will check. + initialize_pagetables( + (&mut mem[0..MIN_PHYSMEM_SIZE]).try_into().unwrap(), + guest_address, + ); + + // Check PDPTE address + let addr_pdpte = u64::from_le_bytes( + mem[(PML4_OFFSET as usize)..(PML4_OFFSET as usize + 8)] + .try_into() + .unwrap(), + ); + assert_eq!( + addr_pdpte - guest_address, + PDPTE_OFFSET | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + + // Check PDE + let addr_pde = u64::from_le_bytes( + mem[(PDPTE_OFFSET as usize)..(PDPTE_OFFSET as usize + 8)] + .try_into() + .unwrap(), + ); + assert_eq!( + addr_pde - guest_address, + PDE_OFFSET | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + + // Check PDE's pagetable bits + for i in (0..4096).step_by(8) { + let pde_addr = (PDE_OFFSET) as usize + i; + let entry = u64::from_le_bytes(mem[pde_addr..(pde_addr + 8)].try_into().unwrap()); + assert!( + PageTableFlags::from_bits_truncate(entry) + .difference( + PageTableFlags::PRESENT + | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE + ) + .is_empty(), + "Pagetable bits at {pde_addr:#x} are incorrect" + ) + } + + // Test GDT + let gdt_results = [0x0, 0xAF9B000000FFFF, 0xCF93000000FFFF]; + for (i, res) in gdt_results.iter().enumerate() { + let gdt_addr = GDT_OFFSET as usize + i * 8; + let gdt_entry = u64::from_le_bytes(mem[gdt_addr..gdt_addr + 8].try_into().unwrap()); + assert_eq!(*res, gdt_entry); + } + } +} diff --git a/src/consts.rs b/src/consts.rs index b33727f32..5a139f667 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -1,19 +1,22 @@ -use uhyve_interface::GuestPhysAddr; - pub const PAGE_SIZE: usize = 0x1000; pub const GDT_KERNEL_CODE: u16 = 1; pub const GDT_KERNEL_DATA: u16 = 2; pub const APIC_DEFAULT_BASE: u64 = 0xfee00000; -pub const BOOT_GDT: GuestPhysAddr = GuestPhysAddr::new(0x1000); + pub const BOOT_GDT_NULL: usize = 0; pub const BOOT_GDT_CODE: usize = 1; pub const BOOT_GDT_DATA: usize = 2; pub const BOOT_GDT_MAX: usize = 3; -pub const BOOT_PML4: GuestPhysAddr = GuestPhysAddr::new(0x10000); -pub const BOOT_PGT: GuestPhysAddr = BOOT_PML4; -pub const BOOT_PDPTE: GuestPhysAddr = GuestPhysAddr::new(0x11000); -pub const BOOT_PDE: GuestPhysAddr = GuestPhysAddr::new(0x12000); -pub const BOOT_INFO_ADDR: GuestPhysAddr = GuestPhysAddr::new(0x9000); + +// guest_address + OFFSET +pub const GDT_OFFSET: u64 = 0x1000; +pub const PML4_OFFSET: u64 = 0x10000; +pub const PGT_OFFSET: u64 = 0x10000; +pub const PDPTE_OFFSET: u64 = 0x11000; +pub const PDE_OFFSET: u64 = 0x12000; +pub const BOOT_INFO_ADDR_OFFSET: u64 = 0x9000; +pub const MIN_PHYSMEM_SIZE: usize = 0x13000; + pub const EFER_SCE: u64 = 1; /* System Call Extensions */ pub const EFER_LME: u64 = 1 << 8; /* Long mode enable */ pub const EFER_LMA: u64 = 1 << 10; /* Long mode active (read-only) */ diff --git a/src/hypercall.rs b/src/hypercall.rs index 0205419cc..47921ec64 100644 --- a/src/hypercall.rs +++ b/src/hypercall.rs @@ -7,7 +7,6 @@ use std::{ use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; use crate::{ - consts::BOOT_PML4, mem::{MemoryError, MmapMemory}, virt_to_phys, }; @@ -102,7 +101,7 @@ pub fn read(mem: &MmapMemory, sysread: &mut ReadPrams) { unsafe { let bytes_read = libc::read( sysread.fd, - mem.host_address(virt_to_phys(sysread.buf, mem, BOOT_PML4).unwrap()) + mem.host_address(virt_to_phys(sysread.buf, mem).unwrap()) .unwrap() as *mut libc::c_void, sysread.len, ); @@ -121,17 +120,15 @@ pub fn write(mem: &MmapMemory, syswrite: &WriteParams) -> io::Result<()> { unsafe { let step = libc::write( syswrite.fd, - mem.host_address( - virt_to_phys(syswrite.buf + bytes_written as u64, mem, BOOT_PML4).unwrap(), - ) - .map_err(|e| match e { - MemoryError::BoundsViolation => { - unreachable!("Bounds violation after host_address function") - } - MemoryError::WrongMemoryError => { - Error::new(ErrorKind::AddrNotAvailable, e.to_string()) - } - })? as *const libc::c_void, + mem.host_address(virt_to_phys(syswrite.buf + bytes_written as u64, mem).unwrap()) + .map_err(|e| match e { + MemoryError::BoundsViolation => { + unreachable!("Bounds violation after host_address function") + } + MemoryError::WrongMemoryError => { + Error::new(ErrorKind::AddrNotAvailable, e.to_string()) + } + })? as *const libc::c_void, syswrite.len - bytes_written, ); if step >= 0 { diff --git a/src/linux/gdb/breakpoints.rs b/src/linux/gdb/breakpoints.rs index 454f9cdf7..1d84f999b 100644 --- a/src/linux/gdb/breakpoints.rs +++ b/src/linux/gdb/breakpoints.rs @@ -4,10 +4,7 @@ use gdbstub::target::{self, ext::breakpoints::WatchKind, TargetResult}; use uhyve_interface::GuestVirtAddr; use super::GdbUhyve; -use crate::{ - arch::x86_64::{registers, virt_to_phys}, - consts::BOOT_PML4, -}; +use crate::arch::x86_64::{registers, virt_to_phys}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct SwBreakpoint { addr: u64, @@ -55,8 +52,7 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { // Safety: mem is not altered during the lifetime of `instructions` let instructions = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem, BOOT_PML4) - .map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem).map_err(|_err| ())?, kind, ) } @@ -76,8 +72,7 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { // Safety: mem is not altered during the lifetime of `instructions` let instructions = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem, BOOT_PML4) - .map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem).map_err(|_err| ())?, kind, ) } diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 5dd525f0a..29f08e3ab 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -30,7 +30,6 @@ use self::breakpoints::SwBreakpoints; use super::HypervisorError; use crate::{ arch::x86_64::{registers::debug::HwBreakpoints, virt_to_phys}, - consts::BOOT_PML4, linux::{x86_64::kvm_cpu::KvmCpu, KickSignal}, vcpu::{VcpuStopReason, VirtualCPU}, vm::UhyveVm, @@ -131,7 +130,7 @@ impl SingleThreadBase for GdbUhyve { // Safety: mem is copied to data before mem can be modified. let src = unsafe { self.vm.mem.slice_at( - virt_to_phys(guest_addr, &self.vm.mem, BOOT_PML4).map_err(|_err| ())?, + virt_to_phys(guest_addr, &self.vm.mem).map_err(|_err| ())?, data.len(), ) } @@ -144,8 +143,7 @@ impl SingleThreadBase for GdbUhyve { // Safety: self.vm.mem is not altered during the lifetime of mem. let mem = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem, BOOT_PML4) - .map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem).map_err(|_err| ())?, data.len(), ) } diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index 8ef037348..916f080a2 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -11,10 +11,12 @@ use crate::{ hypercall, linux::KVM, mem::MmapMemory, + // TODO: Clean this up. vcpu::{VcpuStopReason, VirtualCPU}, virtio::*, vm::UhyveVm, - HypervisorError, HypervisorResult, + HypervisorError, + HypervisorResult, }; const CPUID_EXT_HYPERVISOR: u32 = 1 << 31; @@ -229,6 +231,7 @@ impl KvmCpu { &self, entry_point: u64, stack_address: u64, + guest_address: u64, cpu_id: u32, ) -> Result<(), kvm_ioctls::Error> { //debug!("Setup long mode"); @@ -241,7 +244,7 @@ impl KvmCpu { | Cr0Flags::PAGING; sregs.cr0 = cr0.bits(); - sregs.cr3 = BOOT_PML4.as_u64(); + sregs.cr3 = guest_address + PML4_OFFSET; let cr4 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION; sregs.cr4 = cr4.bits(); @@ -272,7 +275,7 @@ impl KvmCpu { sregs.ss = seg; //sregs.fs = seg; //sregs.gs = seg; - sregs.gdt.base = BOOT_GDT.as_u64(); + sregs.gdt.base = guest_address + GDT_OFFSET; sregs.gdt.limit = ((std::mem::size_of::() * BOOT_GDT_MAX) - 1) as u16; self.vcpu.set_sregs(&sregs)?; @@ -280,7 +283,7 @@ impl KvmCpu { let mut regs = self.vcpu.get_regs()?; regs.rflags = 2; regs.rip = entry_point; - regs.rdi = BOOT_INFO_ADDR.as_u64(); + regs.rdi = guest_address + BOOT_INFO_ADDR_OFFSET; regs.rsi = cpu_id.into(); regs.rsp = stack_address; @@ -305,8 +308,14 @@ impl KvmCpu { &mut self.vcpu } - fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { - self.setup_long_mode(entry_point, stack_address, cpu_id)?; + fn init( + &mut self, + entry_point: u64, + stack_address: u64, + guest_address: u64, + cpu_id: u32, + ) -> HypervisorResult<()> { + self.setup_long_mode(entry_point, stack_address, guest_address, cpu_id)?; self.setup_cpuid()?; // be sure that the multiprocessor is runable @@ -335,7 +344,12 @@ impl VirtualCPU for KvmCpu { parent_vm: parent_vm.clone(), pci_addr: None, }; - kvcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; + kvcpu.init( + parent_vm.get_entry_point(), + parent_vm.stack_address(), + parent_vm.guest_address(), + id, + )?; Ok(kvcpu) } @@ -395,6 +409,12 @@ impl VirtualCPU for KvmCpu { warn!("guest read from unknown I/O port {port:#x}"); } }, + VcpuExit::MmioRead(port, addr) => { + debug!("MmioRead: {:?}", VcpuExit::MmioRead(port, addr)); + } + VcpuExit::MmioWrite(port, addr) => { + debug!("MmioWrite: {:?}", VcpuExit::MmioWrite(port, addr)); + } VcpuExit::IoOut(port, addr) => { let data_addr = GuestPhysAddr::new(unsafe { (*(addr.as_ptr() as *const u32)) as u64 }); diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index 1cfaf230b..ca76ad7b4 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -130,18 +130,17 @@ lazy_static! { let cap: u64 = { read_vmx_cap(&xhypervisor::VMXCap::PINBASED).unwrap() }; cap2ctrl(cap, PIN_BASED_INTR | PIN_BASED_NMI | PIN_BASED_VIRTUAL_NMI) }; - static ref CAP_PROCBASED: u64 = { - let cap: u64 = { read_vmx_cap(&xhypervisor::VMXCap::PROCBASED).unwrap() }; - cap2ctrl( - cap, - CPU_BASED_SECONDARY_CTLS - | CPU_BASED_MWAIT - | CPU_BASED_MSR_BITMAPS - | CPU_BASED_MONITOR - | CPU_BASED_TSC_OFFSET - | CPU_BASED_TPR_SHADOW, - ) - }; + static ref CAP_PROCBASED: u64 = + { + let cap: u64 = { read_vmx_cap(&xhypervisor::VMXCap::PROCBASED).unwrap() }; + cap2ctrl( + cap, + CPU_BASED_SECONDARY_CTLS + | CPU_BASED_MWAIT | CPU_BASED_MSR_BITMAPS + | CPU_BASED_MONITOR | CPU_BASED_TSC_OFFSET + | CPU_BASED_TPR_SHADOW, + ) + }; static ref CAP_PROCBASED2: u64 = { let cap: u64 = { read_vmx_cap(&xhypervisor::VMXCap::PROCBASED2).unwrap() }; cap2ctrl(cap, CPU_BASED2_RDTSCP | CPU_BASED2_APIC_REG_VIRT) diff --git a/src/vm.rs b/src/vm.rs index e058cbb78..442e8aeaf 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -15,6 +15,7 @@ use hermit_entry::{ }; use log::{error, warn}; use thiserror::Error; +use uhyve_interface::GuestPhysAddr; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::{ @@ -24,7 +25,7 @@ use crate::arch::x86_64::{ use crate::linux::x86_64::kvm_cpu::initialize_kvm; use crate::{ arch, consts::*, mem::MmapMemory, os::HypervisorError, params::Params, vcpu::VirtualCPU, - virtio::*, + virtio::*, x86_64::paging::init_guest_mem, }; pub type HypervisorResult = Result; @@ -75,6 +76,7 @@ pub struct UhyveVm { offset: u64, entry_point: u64, stack_address: u64, + guest_address: u64, pub mem: Arc, num_cpus: u32, path: PathBuf, @@ -90,10 +92,26 @@ impl UhyveVm { pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult> { let memory_size = params.memory_size.get(); + // TODO: Use random address + let guest_address = arch::RAM_START; + #[cfg(target_os = "linux")] - let mem = MmapMemory::new(0, memory_size, arch::RAM_START, params.thp, params.ksm); + let mem = MmapMemory::new( + 0, + memory_size, + GuestPhysAddr::new(guest_address), + params.thp, + params.ksm, + ); + #[cfg(not(target_os = "linux"))] - let mem = MmapMemory::new(0, memory_size, arch::RAM_START, false, false); + let mem = MmapMemory::new( + 0, + memory_size, + GuestPhysAddr::new(guest_address), + false, + false, + ); // create virtio interface // TODO: Remove allow once fixed: @@ -119,6 +137,7 @@ impl UhyveVm { offset: 0, entry_point: 0, stack_address: 0, + guest_address, mem: mem.into(), num_cpus: cpu_count, path: kernel_path, @@ -152,6 +171,10 @@ impl UhyveVm { self.stack_address } + pub fn guest_address(&self) -> u64 { + self.guest_address + } + /// Returns the number of cores for the vm. pub fn num_cpus(&self) -> u32 { self.num_cpus @@ -168,10 +191,11 @@ impl UhyveVm { /// Initialize the page tables for the guest fn init_guest_mem(&mut self) { debug!("Initialize guest memory"); - crate::arch::init_guest_mem( + init_guest_mem( unsafe { self.mem.as_slice_mut() } // slice only lives during this fn call .try_into() .expect("Guest memory is not large enough for pagetables"), + self.guest_address, ); } @@ -179,8 +203,13 @@ impl UhyveVm { let elf = fs::read(self.kernel_path())?; let object = KernelObject::parse(&elf).map_err(LoadKernelError::ParseKernelError)?; + // The offset of the kernel in the Memory. Must be larger than BOOT_INFO_OFFSET + KERNEL_STACK_SIZE + let kernel_offset = 0x20_000_usize; // TODO: should be a random start address, if we have a relocatable executable - let kernel_start_address = object.start_addr().unwrap_or(0x400000) as usize; + let kernel_start_address = object + .start_addr() + .unwrap_or(self.mem.guest_address.as_u64() + kernel_offset as u64) + as usize; let kernel_end_address = kernel_start_address + object.mem_size(); self.offset = kernel_start_address as u64; @@ -194,7 +223,7 @@ impl UhyveVm { } = object.load_kernel( // Safety: Slice only lives during this fn call, so no aliasing happens &mut unsafe { self.mem.as_slice_uninit_mut() } - [kernel_start_address..kernel_end_address], + [kernel_offset..object.mem_size() + kernel_offset], kernel_start_address as u64, ); self.entry_point = entry_point; @@ -219,7 +248,7 @@ impl UhyveVm { }; unsafe { let raw_boot_info_ptr = - self.mem.host_address.add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; + self.mem.host_address.add(BOOT_INFO_ADDR_OFFSET as usize) as *mut RawBootInfo; *raw_boot_info_ptr = RawBootInfo::from(boot_info); self.boot_info = raw_boot_info_ptr; }