From 38899de2f32a5c8bb156f5180c6ba3156552124f Mon Sep 17 00:00:00 2001 From: Evan Martin Date: Fri, 19 Jan 2024 00:11:47 -0800 Subject: [PATCH] avoid clown shoes allocation of input files Scanner expects its input to be nul terminated, which I implemented as buf = std::fs::read(file) buf.push(0) However, std::fs::read carefully sizes its buffer to be exactly the file's size, which means the push() must grow the buffer. This means for e.g. a 40mb input file, we'd read it into a 40mb buffer, then grow the buffer, copying it into an 80mb buffer, just to add one byte! Fix this by using a buffer of the appropriate size. I attempted to measure the perf impact here and it wasn't measureable. Possibly this buffer growth really is that fast? It seems it will at least have memory impact at least. --- benches/parse.rs | 7 +++---- src/lib.rs | 2 +- src/load.rs | 4 ++-- src/scanner.rs | 20 +++++++++++++++++++- src/task.rs | 5 ++--- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/benches/parse.rs b/benches/parse.rs index 518e458..71abf60 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -52,8 +52,7 @@ fn bench_parse_synthetic(c: &mut Criterion) { }); } -fn bench_parse_file(c: &mut Criterion, mut input: Vec) { - input.push(0); +fn bench_parse_file(c: &mut Criterion, input: &[u8]) { c.bench_function("parse benches/build.ninja", |b| { b.iter(|| { let mut parser = n2::parse::Parser::new(&input); @@ -67,8 +66,8 @@ fn bench_parse_file(c: &mut Criterion, mut input: Vec) { } pub fn bench_parse(c: &mut Criterion) { - match std::fs::read("benches/build.ninja") { - Ok(input) => bench_parse_file(c, input), + match n2::scanner::read_file_with_nul("benches/build.ninja".as_ref()) { + Ok(input) => bench_parse_file(c, &input), Err(err) => { eprintln!("failed to read benches/build.ninja: {}", err); eprintln!("using synthetic build.ninja"); diff --git a/src/lib.rs b/src/lib.rs index fea575a..c1e8cb0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ mod process_posix; mod process_win; mod progress; pub mod run; -mod scanner; +pub mod scanner; mod signal; mod smallmap; mod task; diff --git a/src/load.rs b/src/load.rs index 677137a..8297b4b 100644 --- a/src/load.rs +++ b/src/load.rs @@ -5,6 +5,7 @@ use crate::{ eval::{EvalPart, EvalString}, graph::{FileId, RspFile}, parse::Statement, + scanner, smallmap::SmallMap, {db, eval, graph, parse, trace}, }; @@ -172,11 +173,10 @@ impl Loader { fn read_file(&mut self, id: FileId) -> anyhow::Result<()> { let path = self.graph.file(id).path().to_path_buf(); - let mut bytes = match trace::scope("fs::read", || std::fs::read(&path)) { + let bytes = match trace::scope("read file", || scanner::read_file_with_nul(&path)) { Ok(b) => b, Err(e) => bail!("read {}: {}", path.display(), e), }; - bytes.push(0); self.parse(path, &bytes) } diff --git a/src/scanner.rs b/src/scanner.rs index af06d2d..091791d 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,6 +1,6 @@ //! Scans an input string (source file) character by character. -use std::path::Path; +use std::{io::Read, path::Path}; #[derive(Debug)] pub struct ParseError { @@ -132,3 +132,21 @@ impl<'a> Scanner<'a> { panic!("invalid offset when formatting error") } } + +/// Scanner wants its input buffer to end in a trailing nul. +/// This function is like std::fs::read() but appends a nul, efficiently. +pub fn read_file_with_nul(path: &Path) -> std::io::Result> { + // Using std::fs::read() to read the file and then pushing a nul on the end + // causes us to allocate a buffer the size of the file, then grow it to push + // the nul, copying the entire file(!). So instead create a buffer of the + // right size up front. + let mut file = std::fs::File::open(path)?; + let size = file.metadata()?.len() as usize; + let mut bytes = Vec::with_capacity(size + 1); + unsafe { + bytes.set_len(size); + } + file.read_exact(&mut bytes[..size])?; + bytes.push(0); + Ok(bytes) +} diff --git a/src/task.rs b/src/task.rs index ec4dc8d..693ab52 100644 --- a/src/task.rs +++ b/src/task.rs @@ -12,7 +12,7 @@ use crate::{ depfile, graph::{Build, BuildId, RspFile}, process, - scanner::Scanner, + scanner::{self, Scanner}, }; use anyhow::{anyhow, bail}; use std::path::{Path, PathBuf}; @@ -38,7 +38,7 @@ pub struct TaskResult { /// Reads dependencies from a .d file path. fn read_depfile(path: &Path) -> anyhow::Result> { - let mut bytes = match std::fs::read(path) { + let bytes = match scanner::read_file_with_nul(path) { Ok(b) => b, // See discussion of missing depfiles in #80. // TODO(#99): warn or error in this circumstance? @@ -46,7 +46,6 @@ fn read_depfile(path: &Path) -> anyhow::Result> { Err(e) => bail!("read {}: {}", path.display(), e), }; - bytes.push(0); let mut scanner = Scanner::new(&bytes); let parsed_deps = depfile::parse(&mut scanner) .map_err(|err| anyhow!(scanner.format_parse_error(path, err)))?;