From b8fbb616cfb7ecebf9792145ff6b520e5988fa97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Tue, 27 Aug 2024 13:46:30 -0700 Subject: [PATCH] Use virtiofsd for sharing file system data with host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch over to using virtiofsd for sharing file system data with the host. virtiofs is a file system designed for the needs of virtual machines and environments. That is in contrast to 9P fs, which we currently use for sharing data with the host, which is first and foremost a network file system. 9P is problematic if for no other reason that it lacks proper support for usage of the "open-unlink-fstat idiom", in which files are unlinked and later referenced via file descriptor (see #83). virtiofs does not have this problem. This change replaces usage of 9P with that of virtiofs. In order to work, virtiofsd needs a user space server. Contrary to what is the case for 9P, it is not currently integrated into Qemu itself and so we have to manage it separately (and require the user to install it). I benchmarked both the current master as well as this version with a bare-bones custom kernel: Benchmark 1: target/release/vmtest -k bzImage-9p 'echo test' Time (mean ± σ): 1.316 s ± 0.087 s [User: 0.462 s, System: 1.104 s] Range (min … max): 1.232 s … 1.463 s 10 runs Benchmark 1: target/release/vmtest -k bzImage-virtiofsd 'echo test' Time (mean ± σ): 1.244 s ± 0.011 s [User: 0.307 s, System: 0.358 s] Range (min … max): 1.227 s … 1.260 s 10 runs So it seems there is a ~0.7s speed up, on average (and significantly less system time being used). This is great, but I suspect a more pronounced speed advantage will be visible when working with large files, in which virtiofs is said to significantly outperform 9P (typically >2x from what I understand, but I have not done any benchmarks of that nature). A few other notes: - we solely rely on guest level read-only mounts to enforce read-only state. The virtiofsd recommended way is to use read-only bind mounts [0], but doing so would require root. - we are not using DAX, because it still is still incomplete and apparently requires building Qemu (?) from source. In any event, it should not change anything functionally and be solely a performance improvement. - interestingly, there may be the option of just consuming the virtiofsd crate as a library and not require any shelling out. That would be *much* nicer, but the current APIs make this somewhat cumbersome. I'd think we'd pretty much have to reimplement their entire main() functionality [1]. I consider this way out of scope for this first version. I have adjusted the configs, but because I don't have Docker handy I can't really create those kernel. CI seems incapable of producing the artifacts without doing a fully-blown release dance. No idea what empty is about, really. I suspect the test failures we see are because it lacks support? Some additional resources worth keeping around: - https://virtio-fs.gitlab.io/howto-boot.html - https://virtio-fs.gitlab.io/howto-qemu.html [0] https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md?ref_type=heads#faq [1] https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/src/main.rs?ref_type=heads#L1242 Closes: #16 Closes: #83 Signed-off-by: Daniel Müller --- .github/workflows/rust.yml | 5 +- Cargo.toml | 2 +- README.md | 10 +-- src/lib.rs | 1 + src/output.rs | 5 ++ src/qemu.rs | 130 ++++++++++++++++++++-------------- src/ui.rs | 10 +++ src/virtiofsd.rs | 141 +++++++++++++++++++++++++++++++++++++ tests/kernels/archlinux | 4 +- tests/kernels/fedora38 | 4 +- tests/test.rs | 2 +- 11 files changed, 248 insertions(+), 66 deletions(-) create mode 100644 src/virtiofsd.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8e7787b..ca2467d 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -16,7 +16,8 @@ env: jobs: build-test: - runs-on: ubuntu-latest + # ubuntu-24.04 is the earliest image with a `virtiofsd` package. + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v3 @@ -36,7 +37,7 @@ jobs: run: | sudo apt-get update # Virtualization deps - sudo apt-get install -y qemu-system-x86-64 qemu-guest-agent qemu-utils ovmf + sudo apt-get install -y qemu-system-x86-64 qemu-guest-agent qemu-utils ovmf virtiofsd - name: Cache test assets uses: actions/cache@v3 diff --git a/Cargo.toml b/Cargo.toml index bb76c7c..ecd3996 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ readme = "README.md" version = "0.14.0" edition = "2021" license = "Apache-2.0" -rust-version = "1.70.0" +rust-version = "1.80.0" [dependencies] anyhow = "1.0.66" diff --git a/README.md b/README.md index 5f34157..765442b 100644 --- a/README.md +++ b/README.md @@ -26,14 +26,15 @@ The following are required dependencies, grouped by location: Host machine: -* [`qemu`](https://pkgs.org/download/qemu) +* [`qemu`](https://pkgs.org/download/qemu) (version 5.9 or higher) * [`qemu-guest-agent`](https://pkgs.org/search/?q=qemu-guest-agent) * [`OVMF`](https://pkgs.org/download/ovmf) +* [`virtiofsd`](https://gitlab.com/virtio-fs/virtiofsd) Virtual machine image: * `qemu-guest-agent` -* Kernel 9p filesystem support, either compiled in or as modules (see kernel +* Kernel `virtiofs` support, either compiled in or as modules (see kernel dependencies) * Most (if not all) distros already ship support as modules or better @@ -42,9 +43,8 @@ Kernel: * `CONFIG_VIRTIO=y` * `CONFIG_VIRTIO_PCI=y` * `CONFIG_VIRTIO_CONSOLE=y` -* `CONFIG_NET_9P=y` -* `CONFIG_NET_9P_VIRTIO=y` -* `CONFIG_9P_FS=y` +* `CONFIG_FUSE_FS=y` +* `CONFIG_VIRTIO_FS=y` Note the virtual machine image dependencies are only required if you're using the `image` target parameter. Likewise, the same applies for kernel diff --git a/src/lib.rs b/src/lib.rs index 7c7de7c..93988c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,3 +17,4 @@ pub use crate::vmtest::*; mod qemu; mod qga; mod util; +mod virtiofsd; diff --git a/src/output.rs b/src/output.rs index 440e242..e8cd0c2 100644 --- a/src/output.rs +++ b/src/output.rs @@ -10,6 +10,11 @@ use anyhow::Result; /// Receivers should treat failures as terminal and not expect any more /// updates. pub enum Output { + /// On-host initialization starts + InitializeStart, + /// Initialization finished with provided with provided result + InitializeEnd(Result<()>), + /// VM boot begins BootStart, /// Output related to VM boot diff --git a/src/qemu.rs b/src/qemu.rs index f19a4ed..b62c77e 100644 --- a/src/qemu.rs +++ b/src/qemu.rs @@ -27,18 +27,17 @@ use tinytemplate::{format_unescaped, TinyTemplate}; use crate::output::Output; use crate::qga::QgaWrapper; use crate::util::gen_sock; +use crate::virtiofsd::Virtiofsd; use crate::{Mount, Target, VMConfig}; const INIT_TEMPLATE: &str = include_str!("init/init.sh.template"); const COMMAND_TEMPLATE: &str = include_str!("init/command.template"); -// Needs to be `/dev/root` for kernel to "find" the 9pfs as rootfs -const ROOTFS_9P_FS_MOUNT_TAG: &str = "/dev/root"; -const SHARED_9P_FS_MOUNT_TAG: &str = "vmtest-shared"; +const ROOT_FS_MOUNT_TAG: &str = "rootfs"; +const SHARED_FS_MOUNT_TAG: &str = "vmtest-shared"; const COMMAND_OUTPUT_PORT_NAME: &str = "org.qemu.virtio_serial.0"; const MAGIC_INTERACTIVE_COMMAND: &str = "-"; -const SHARED_9P_FS_MOUNT_PATH: &str = "/mnt/vmtest"; -const MOUNT_OPTS_9P_FS: &str = "trans=virtio,cache=mmap,msize=1048576"; +const SHARED_FS_MOUNT_PATH: &str = "/mnt/vmtest"; const OVMF_PATHS: &[&str] = &[ // Fedora "/usr/share/edk2/ovmf/OVMF_CODE.fd", @@ -55,6 +54,8 @@ type QmpUnixStream = qapi::Stream, UnixStream>; /// Represents a single QEMU instance pub struct Qemu { process: Command, + /// `virtiofsd` instances for each of the mounts in use. + virtiofsds: Vec, qga_sock: PathBuf, qmp_sock: PathBuf, command: String, @@ -241,6 +242,18 @@ fn guest_agent_args(sock: &Path) -> Vec { args } +/// Generate general arguments necessary for working with `virtiofs`. +fn virtiofs_general_args(vm: &VMConfig) -> Vec { + let mut args: Vec = Vec::new(); + + args.push("-object".into()); + args.push(format!("memory-backend-memfd,id=mem,share=on,size={}", vm.memory.as_str()).into()); + args.push("-numa".into()); + args.push("node,memdev=mem".into()); + + args +} + /// Generate arguments for full KVM virtualization if host supports it fn kvm_args(arch: &str) -> Vec<&'static str> { let mut args = Vec::new(); @@ -291,30 +304,17 @@ fn machine_protocol_args(sock: &Path) -> Vec { args } -/// Generate arguments for setting up 9p FS server on host +/// Generate per-file-system arguments necessary for working with `virtiofs`. /// -/// `id` is the ID for the FS export (currently unused AFAICT) +/// `id` is the ID for the FS export /// `mount_tag` is used inside guest to find the export -fn plan9_fs_args(host_shared: &Path, id: &str, mount_tag: &str, ro: bool) -> Vec { +fn virtiofs_per_fs_args(virtiofsd: &Virtiofsd, id: &str, mount_tag: &str) -> Vec { let mut args: Vec = Vec::new(); - args.push("-virtfs".into()); - - let mut arg = OsString::new(); - arg.push(format!("local,id={id},path=")); - arg.push(if host_shared.as_os_str().is_empty() { - // This case occurs when the config file path is just "vmtest.toml" - Path::new(".") - } else { - host_shared - }); - arg.push(format!( - ",mount_tag={mount_tag},security_model=none,multidevs=remap" - )); - if ro { - arg.push(",readonly=on") - } - args.push(arg); + args.push("-chardev".into()); + args.push(format!("socket,id={id},path={}", virtiofsd.socket_path().display()).into()); + args.push("-device".into()); + args.push(format!("vhost-user-fs-pci,queue-size=1024,chardev={id},tag={mount_tag}").into()); args } @@ -371,9 +371,9 @@ fn kernel_args( // The guest kernel command line args let mut cmdline: Vec = Vec::new(); - // Tell kernel the rootfs is 9p - cmdline.push("rootfstype=9p".into()); - cmdline.push(format!("rootflags={}", MOUNT_OPTS_9P_FS).into()); + // Tell kernel the rootfs is on a virtiofs and what "tag" it uses. + cmdline.push("rootfstype=virtiofs".into()); + cmdline.push(format!("root={ROOT_FS_MOUNT_TAG}").into()); // Mount rootfs readable/writable to make experience more smooth. // Lots of tools expect to be able to write logs or change global @@ -455,16 +455,6 @@ fn vmconfig_args(vm: &VMConfig) -> Vec { vm.memory.clone().into(), ]; - for mount in vm.mounts.values() { - let name = format!("mount{}", hash(&mount.host_path)); - args.append(&mut plan9_fs_args( - &mount.host_path, - &name, - &name, - !mount.writable, - )); - } - let mut extra_args = vm .extra_args .clone() @@ -650,6 +640,7 @@ impl Qemu { let command_sock = gen_sock("cmdout"); let (init, guest_init) = gen_init(&target.rootfs).context("Failed to generate init")?; + let mut virtiofsds = Vec::new(); let mut c = Command::new(format!("qemu-system-{}", target.arch)); c.args(QEMU_DEFAULT_ARGS) @@ -660,6 +651,7 @@ impl Qemu { .args(machine_args(&target.arch)) .args(machine_protocol_args(&qmp_sock)) .args(guest_agent_args(&qga_sock)) + .args(virtiofs_general_args(&target.vm)) .args(virtio_serial_args(&command_sock)); // Always ensure the rootfs is first. if let Some(image) = &target.image { @@ -668,11 +660,11 @@ impl Qemu { c.args(uefi_firmware_args(target.vm.bios.as_deref())); } } else if let Some(kernel) = &target.kernel { - c.args(plan9_fs_args( - target.rootfs.as_path(), + let virtiofsd = Virtiofsd::new(target.rootfs.as_path())?; + c.args(virtiofs_per_fs_args( + &virtiofsd, "root", - ROOTFS_9P_FS_MOUNT_TAG, - false, + ROOT_FS_MOUNT_TAG, )); c.args(kernel_args( kernel, @@ -680,16 +672,30 @@ impl Qemu { guest_init.as_path(), target.kernel_args.as_ref(), )); + virtiofsds.push(virtiofsd); } else { panic!("Config validation should've enforced XOR"); } + // Now add the shared mount and other extra mounts. - c.args(plan9_fs_args( - host_shared, + let virtiofsd = Virtiofsd::new(host_shared)?; + c.args(virtiofs_per_fs_args( + &virtiofsd, "shared", - SHARED_9P_FS_MOUNT_TAG, - false, + SHARED_FS_MOUNT_TAG, )); + virtiofsds.push(virtiofsd); + + for mount in target.vm.mounts.values() { + let name = format!("mount{}", hash(&mount.host_path)); + let virtiofsd = Virtiofsd::new(&mount.host_path)?; + c.args(virtiofs_per_fs_args( + &virtiofsd, + &name, + &name, + )); + virtiofsds.push(virtiofsd); + } c.args(vmconfig_args(&target.vm)); if log_enabled!(Level::Error) { @@ -706,6 +712,7 @@ impl Qemu { let mut qemu = Self { process: c, + virtiofsds, qga_sock, qmp_sock, command: target.command.to_string(), @@ -838,16 +845,18 @@ impl Qemu { // We can race with VM/qemu coming up. So retry a few times with growing backoff. let mut rc = 0; for i in 0..5 { - let mount_opts = if ro { - format!("{},ro", MOUNT_OPTS_9P_FS) - } else { - MOUNT_OPTS_9P_FS.into() - }; + let mut args = vec![ + "-t", "virtiofs", mount_tag, guest_path + ]; + if ro { + args.push("-oro") + } + rc = run_in_vm( qga, &output_fn, "mount", - &["-t", "9p", "-o", &mount_opts, mount_tag, guest_path], + &args, false, None, )?; @@ -1052,7 +1061,7 @@ impl Qemu { // Mount shared directory inside guest let _ = self.updates.send(Output::SetupStart); if let Err(e) = - self.mount_in_guest(qga, SHARED_9P_FS_MOUNT_PATH, SHARED_9P_FS_MOUNT_TAG, false) + self.mount_in_guest(qga, SHARED_FS_MOUNT_PATH, SHARED_FS_MOUNT_TAG, false) { return Err(e).context("Failed to mount shared directory in guest"); } @@ -1075,6 +1084,21 @@ impl Qemu { /// Errors and return status are reported through the `updates` channel passed into the /// constructor. pub fn run(mut self) { + let _ = self.updates.send(Output::InitializeStart); + for phase_fn in [Virtiofsd::launch, Virtiofsd::await_launched] { + for virtiofsd in self.virtiofsds.iter_mut() { + match phase_fn(virtiofsd) { + Ok(()) => (), + Err(e) => { + let _ = self.updates.send(Output::InitializeEnd(Err(e))); + return; + } + } + } + } + + let _ = self.updates.send(Output::InitializeEnd(Ok(()))); + // Start QEMU let (mut child, qga, mut qmp) = match self.boot_vm() { Ok((c, qga, qmp)) => (c, qga, qmp), diff --git a/src/ui.rs b/src/ui.rs index 63b8273..80a7f7d 100644 --- a/src/ui.rs +++ b/src/ui.rs @@ -177,6 +177,16 @@ impl Ui { }; match &msg { + Output::InitializeStart => { + stage = Stage::new(term.clone(), &heading("Initializing host environment", 2), Some(stage)); + stages += 1; + } + Output::InitializeEnd(r) => { + if let Err(e) = r { + error_out_stage(&mut stage, e); + errors += 1; + } + } Output::BootStart => { stage = Stage::new(term.clone(), &heading("Booting", 2), Some(stage)); stages += 1; diff --git a/src/virtiofsd.rs b/src/virtiofsd.rs new file mode 100644 index 0000000..1bfa472 --- /dev/null +++ b/src/virtiofsd.rs @@ -0,0 +1,141 @@ +use std::path::Path; +use std::path::PathBuf; +use std::process::Child; +use std::process::Command; +use std::process::Stdio; +use std::sync::LazyLock; +use std::time::Duration; +use std::time::Instant; + +use anyhow::bail; +use anyhow::Context as _; +use anyhow::Result; +use log::error; + +use crate::util::gen_sock; + +const VIRTIOFSD_NAME: &str = "virtiofsd"; +static VIRTIOFSD_PATH: LazyLock> = LazyLock::new(find_virtiofsd_exe); + +fn find_virtiofsd_exe() -> Option { + // The virtiofsd binary can be found at `/usr/libexec/virtiofsd` + // on Debian and Fedora, `/usr/lib/virtiofsd` on Arch. + let dirs = ["/usr/libexec/", "/usr/lib/"]; + for dir in dirs { + let path = Path::new(dir).join(VIRTIOFSD_NAME); + if path.exists() { + return Some(path); + } + } + + None +} + +enum State { + Command(Command), + Process(Child), +} + +pub(crate) struct Virtiofsd { + /// Our main state. + state: State, + /// The path to the Unix domain socket used for communication. + socket_path: PathBuf, +} + +impl Virtiofsd { + /// Create a `Virtiofsd` instance for sharing the given directory. + pub fn new(shared_dir: &Path) -> Result { + let virtiofsd_path = VIRTIOFSD_PATH + .as_ref() + .with_context(|| format!("`{VIRTIOFSD_NAME}` binary not found"))?; + let socket_path = gen_sock(VIRTIOFSD_NAME); + + let mut command = Command::new(virtiofsd_path); + let _ = command + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .arg("--socket-path") + .arg(&socket_path) + .arg("--shared-dir") + .arg(shared_dir) + .arg("-ocache=always") + .arg("--sandbox=none") + .arg("--announce-submounts") + .arg("--log-level=error"); + + let slf = Self { + state: State::Command(command), + socket_path, + }; + Ok(slf) + } + + pub fn launch(&mut self) -> Result<()> { + match &mut self.state { + State::Command(command) => { + let process = command + .spawn() + .with_context(|| format!("failed to spawn `{VIRTIOFSD_NAME}` instance"))?; + + self.state = State::Process(process) + } + State::Process(..) => { + // Already launched. No-op. + } + } + Ok(()) + } + + pub fn await_launched(&mut self) -> Result<()> { + if let State::Command(..) = self.state { + let () = self.launch()?; + } + + match self.state { + State::Command(..) => unreachable!(), + State::Process(..) => { + let now = Instant::now(); + let timeout = Duration::from_secs(5); + + while now.elapsed() < timeout { + if self.socket_path.exists() { + return Ok(()); + } + } + } + }; + + bail!( + "{VIRTIOFSD_NAME} socket `{}` did not appear in time", + self.socket_path.display() + ) + } + + #[inline] + pub fn socket_path(&self) -> &Path { + &self.socket_path + } +} + +impl Drop for Virtiofsd { + fn drop(&mut self) { + if let State::Process(process) = &mut self.state { + if let Err(err) = process.kill() { + error!("failed to kill `{VIRTIOFSD_NAME}` instance: {err}"); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Check that we can discover the path to `virtiofsd`. + #[test] + fn virtiofsd_discovery() { + let _path = VIRTIOFSD_PATH.as_ref().expect("failed to find virtiofsd"); + } +} diff --git a/tests/kernels/archlinux b/tests/kernels/archlinux index 1b51a39..cbadc22 100644 --- a/tests/kernels/archlinux +++ b/tests/kernels/archlinux @@ -10078,9 +10078,9 @@ CONFIG_QFMT_V2=m CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m -CONFIG_VIRTIO_FS=m +CONFIG_VIRTIO_FS=y CONFIG_FUSE_DAX=y CONFIG_OVERLAY_FS=m CONFIG_OVERLAY_FS_REDIRECT_DIR=y diff --git a/tests/kernels/fedora38 b/tests/kernels/fedora38 index e247a50..603b510 100644 --- a/tests/kernels/fedora38 +++ b/tests/kernels/fedora38 @@ -9359,9 +9359,9 @@ CONFIG_QFMT_V2=y CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m -CONFIG_VIRTIO_FS=m +CONFIG_VIRTIO_FS=y CONFIG_FUSE_DAX=y CONFIG_OVERLAY_FS=m # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set diff --git a/tests/test.rs b/tests/test.rs index f5140c2..86741b4 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -113,7 +113,7 @@ fn test_vmtest_infra_error() { assert_eq!(failed, 69); } -// Expect we can run each target one by one, sucessfully +// Expect we can run each target one by one, successfully #[test] fn test_run_one() { let uefi_image = create_new_image(asset("image-uefi.raw-efi"));