diff --git a/benches/parse.rs b/benches/parse.rs index aa29db6..518e458 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -1,5 +1,6 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use std::io::Write; +use n2::parse::Loader; +use std::{io::Write, path::PathBuf, str::FromStr}; pub fn bench_canon(c: &mut Criterion) { // TODO switch to canon_path_fast @@ -21,16 +22,9 @@ pub fn bench_canon(c: &mut Criterion) { }); } -struct NoOpLoader {} -impl n2::parse::Loader for NoOpLoader { - type Path = (); - fn path(&mut self, _path: &mut str) -> Self::Path { - () - } -} - fn generate_build_ninja() -> Vec { let mut buf: Vec = Vec::new(); + write!(buf, "rule cc\n command = touch $out",).unwrap(); for i in 0..1000 { write!( buf, @@ -44,14 +38,13 @@ fn generate_build_ninja() -> Vec { } fn bench_parse_synthetic(c: &mut Criterion) { - let mut loader = NoOpLoader {}; let mut input = generate_build_ninja(); input.push(0); c.bench_function("parse synthetic build.ninja", |b| { b.iter(|| { let mut parser = n2::parse::Parser::new(&input); loop { - if parser.read(&mut loader).unwrap().is_none() { + if parser.read().unwrap().is_none() { break; } } @@ -60,13 +53,12 @@ fn bench_parse_synthetic(c: &mut Criterion) { } fn bench_parse_file(c: &mut Criterion, mut input: Vec) { - let mut loader = NoOpLoader {}; input.push(0); c.bench_function("parse benches/build.ninja", |b| { b.iter(|| { let mut parser = n2::parse::Parser::new(&input); loop { - if parser.read(&mut loader).unwrap().is_none() { + if parser.read().unwrap().is_none() { break; } } @@ -85,5 +77,18 @@ pub fn bench_parse(c: &mut Criterion) { }; } -criterion_group!(benches, bench_canon, bench_parse); +fn bench_load_synthetic(c: &mut Criterion) { + let mut input = generate_build_ninja(); + input.push(0); + c.bench_function("load synthetic build.ninja", |b| { + b.iter(|| { + let mut loader = n2::load::Loader::new(); + loader + .parse(PathBuf::from_str("build.ninja").unwrap(), &input) + .unwrap(); + }) + }); +} + +criterion_group!(benches, bench_canon, bench_parse, bench_load_synthetic); criterion_main!(benches); diff --git a/src/eval.rs b/src/eval.rs index c3f296d..c5f511b 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -2,17 +2,18 @@ //! `c++ $in -o $out`, and mechanisms for expanding those into plain strings. use crate::smallmap::SmallMap; +use std::borrow::Borrow; use std::{borrow::Cow, collections::HashMap}; /// An environment providing a mapping of variable name to variable value. -/// A given EvalString may be expanded with multiple environments as possible -/// context. +/// This represents one "frame" of evaluation context, a given EvalString may +/// need multiple environments in order to be fully expanded. pub trait Env { - fn get_var(&self, var: &str) -> Option>; + fn get_var(&self, var: &str) -> Option>>; } /// One token within an EvalString, either literal text or a variable reference. -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub enum EvalPart> { Literal(T), VarRef(T), @@ -22,29 +23,38 @@ pub enum EvalPart> { /// This is generic to support EvalString<&str>, which is used for immediately- /// expanded evals, like top-level bindings, and EvalString, which is /// used for delayed evals like in `rule` blocks. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct EvalString>(Vec>); impl> EvalString { pub fn new(parts: Vec>) -> Self { EvalString(parts) } - pub fn evaluate(&self, envs: &[&dyn Env]) -> String { - let mut val = String::new(); + fn evaluate_inner(&self, result: &mut String, envs: &[&dyn Env]) { for part in &self.0 { match part { - EvalPart::Literal(s) => val.push_str(s.as_ref()), + EvalPart::Literal(s) => result.push_str(s.as_ref()), EvalPart::VarRef(v) => { - for env in envs { + for (i, env) in envs.iter().enumerate() { if let Some(v) = env.get_var(v.as_ref()) { - val.push_str(&v); + v.evaluate_inner(result, &envs[i + 1..]); break; } } } } } - val + } + + /// evalulate turns the EvalString into a regular String, looking up the + /// values of variable references in the provided Envs. It will look up + /// its variables in the earliest Env that has them, and then those lookups + /// will be recursively expanded starting from the env after the one that + /// had the first successful lookup. + pub fn evaluate(&self, envs: &[&dyn Env]) -> String { + let mut result = String::new(); + self.evaluate_inner(&mut result, envs); + result } } @@ -62,6 +72,34 @@ impl EvalString<&str> { } } +impl EvalString { + pub fn as_cow(&self) -> EvalString> { + EvalString( + self.0 + .iter() + .map(|part| match part { + EvalPart::Literal(s) => EvalPart::Literal(Cow::Borrowed(s.as_ref())), + EvalPart::VarRef(s) => EvalPart::VarRef(Cow::Borrowed(s.as_ref())), + }) + .collect(), + ) + } +} + +impl EvalString<&str> { + pub fn as_cow(&self) -> EvalString> { + EvalString( + self.0 + .iter() + .map(|part| match part { + EvalPart::Literal(s) => EvalPart::Literal(Cow::Borrowed(*s)), + EvalPart::VarRef(s) => EvalPart::VarRef(Cow::Borrowed(*s)), + }) + .collect(), + ) + } +} + /// A single scope's worth of variable definitions. #[derive(Debug, Default)] pub struct Vars<'text>(HashMap<&'text str, String>); @@ -70,36 +108,34 @@ impl<'text> Vars<'text> { pub fn insert(&mut self, key: &'text str, val: String) { self.0.insert(key, val); } - pub fn get(&self, key: &'text str) -> Option<&String> { + pub fn get(&self, key: &str) -> Option<&String> { self.0.get(key) } } impl<'a> Env for Vars<'a> { - fn get_var(&self, var: &str) -> Option> { - self.0.get(var).map(|str| Cow::Borrowed(str.as_str())) + fn get_var(&self, var: &str) -> Option>> { + Some(EvalString::new(vec![EvalPart::Literal( + std::borrow::Cow::Borrowed(self.get(var)?), + )])) + } +} + +impl + PartialEq> Env for SmallMap> { + fn get_var(&self, var: &str) -> Option>> { + Some(self.get(var)?.as_cow()) } } -// Impl for Loader.rules -impl Env for SmallMap> { - fn get_var(&self, var: &str) -> Option> { - // TODO(#83): this is wrong in that it doesn't include envs. - // This can occur when you have e.g. - // rule foo - // bar = $baz - // build ...: foo - // x = $bar - // When evaluating the value of `x`, we find `bar` in the rule but - // then need to pick the right env to evaluate $baz. But we also don't - // wanna generically always use all available envs because we don't - // wanna get into evaluation cycles. - self.get(var).map(|val| Cow::Owned(val.evaluate(&[]))) +impl + PartialEq> Env for SmallMap> { + fn get_var(&self, var: &str) -> Option>> { + Some(self.get(var)?.as_cow()) } } -// Impl for the variables attached to a build. impl Env for SmallMap<&str, String> { - fn get_var(&self, var: &str) -> Option> { - self.get(var).map(|val| Cow::Borrowed(val.as_str())) + fn get_var(&self, var: &str) -> Option>> { + Some(EvalString::new(vec![EvalPart::Literal( + std::borrow::Cow::Borrowed(self.get(var)?), + )])) } } diff --git a/src/lib.rs b/src/lib.rs index 50e258f..fea575a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ mod depfile; mod eval; mod graph; mod hash; -mod load; +pub mod load; pub mod parse; mod process; #[cfg(unix)] diff --git a/src/load.rs b/src/load.rs index 51c4c6f..677137a 100644 --- a/src/load.rs +++ b/src/load.rs @@ -2,6 +2,7 @@ use crate::{ canon::{canon_path, canon_path_fast}, + eval::{EvalPart, EvalString}, graph::{FileId, RspFile}, parse::Statement, smallmap::SmallMap, @@ -30,12 +31,14 @@ impl<'a> BuildImplicitVars<'a> { } } impl<'a> eval::Env for BuildImplicitVars<'a> { - fn get_var(&self, var: &str) -> Option> { + fn get_var(&self, var: &str) -> Option>> { + let string_to_evalstring = + |s: String| Some(EvalString::new(vec![EvalPart::Literal(Cow::Owned(s))])); match var { - "in" => Some(Cow::Owned(self.file_list(self.build.explicit_ins(), ' '))), - "in_newline" => Some(Cow::Owned(self.file_list(self.build.explicit_ins(), '\n'))), - "out" => Some(Cow::Owned(self.file_list(self.build.explicit_outs(), ' '))), - "out_newline" => Some(Cow::Owned(self.file_list(self.build.explicit_outs(), '\n'))), + "in" => string_to_evalstring(self.file_list(self.build.explicit_ins(), ' ')), + "in_newline" => string_to_evalstring(self.file_list(self.build.explicit_ins(), '\n')), + "out" => string_to_evalstring(self.file_list(self.build.explicit_outs(), ' ')), + "out_newline" => string_to_evalstring(self.file_list(self.build.explicit_outs(), '\n')), _ => None, } } @@ -43,7 +46,7 @@ impl<'a> eval::Env for BuildImplicitVars<'a> { /// Internal state used while loading. #[derive(Default)] -struct Loader { +pub struct Loader { graph: graph::Graph, default: Vec, /// rule name -> list of (key, val) @@ -64,7 +67,7 @@ impl parse::Loader for Loader { } impl Loader { - fn new() -> Self { + pub fn new() -> Self { let mut loader = Loader::default(); loader.rules.insert("phony".to_owned(), SmallMap::default()); @@ -72,21 +75,38 @@ impl Loader { loader } + fn evaluate_path(&mut self, path: EvalString<&str>, envs: &[&dyn eval::Env]) -> FileId { + use parse::Loader; + let mut evaluated = path.evaluate(envs); + self.path(&mut evaluated) + } + + fn evaluate_paths( + &mut self, + paths: Vec>, + envs: &[&dyn eval::Env], + ) -> Vec { + paths + .into_iter() + .map(|path| self.evaluate_path(path, envs)) + .collect() + } + fn add_build( &mut self, filename: std::rc::Rc, env: &eval::Vars, - b: parse::Build, + b: parse::Build, ) -> anyhow::Result<()> { let ins = graph::BuildIns { - ids: b.ins, + ids: self.evaluate_paths(b.ins, &[&b.vars, env]), explicit: b.explicit_ins, implicit: b.implicit_ins, order_only: b.order_only_ins, // validation is implied by the other counts }; let outs = graph::BuildOuts { - ids: b.outs, + ids: self.evaluate_paths(b.outs, &[&b.vars, env]), explicit: b.explicit_outs, }; let mut build = graph::Build::new( @@ -108,21 +128,14 @@ impl Loader { build: &build, }; - // Expand all build-scoped variable values, as they may be referred to in rules. - let mut build_vars = SmallMap::default(); - for &(name, ref val) in b.vars.iter() { - let val = val.evaluate(&[&implicit_vars, &build_vars, env]); - build_vars.insert(name, val); - } - - let envs: [&dyn eval::Env; 4] = [&implicit_vars, &build_vars, rule, env]; + // temp variable in order to not move all of b into the closure + let build_vars = &b.vars; let lookup = |key: &str| -> Option { // Look up `key = ...` binding in build and rule block. - let val = match build_vars.get(key) { - Some(val) => val.clone(), - None => rule.get(key)?.evaluate(&envs), - }; - Some(val) + Some(match rule.get(key) { + Some(val) => val.evaluate(&[&implicit_vars, build_vars, env]), + None => build_vars.get(key)?.evaluate(&[env]), + }) }; let cmdline = lookup("command"); @@ -167,29 +180,47 @@ impl Loader { self.parse(path, &bytes) } - fn parse(&mut self, path: PathBuf, bytes: &[u8]) -> anyhow::Result<()> { + fn evaluate_and_read_file( + &mut self, + file: EvalString<&str>, + envs: &[&dyn eval::Env], + ) -> anyhow::Result<()> { + let evaluated = self.evaluate_path(file, envs); + self.read_file(evaluated) + } + + pub fn parse(&mut self, path: PathBuf, bytes: &[u8]) -> anyhow::Result<()> { let filename = std::rc::Rc::new(path); let mut parser = parse::Parser::new(&bytes); + loop { let stmt = match parser - .read(self) + .read() .map_err(|err| anyhow!(parser.format_parse_error(&filename, err)))? { None => break, Some(s) => s, }; match stmt { - Statement::Include(id) => trace::scope("include", || self.read_file(id))?, + Statement::Include(id) => trace::scope("include", || { + self.evaluate_and_read_file(id, &[&parser.vars]) + })?, // TODO: implement scoping for subninja - Statement::Subninja(id) => trace::scope("subninja", || self.read_file(id))?, + Statement::Subninja(id) => trace::scope("subninja", || { + self.evaluate_and_read_file(id, &[&parser.vars]) + })?, Statement::Default(defaults) => { - self.default.extend(defaults); + let evaluated = self.evaluate_paths(defaults, &[&parser.vars]); + self.default.extend(evaluated); } Statement::Rule(rule) => { let mut vars: SmallMap> = SmallMap::default(); for (name, val) in rule.vars.into_iter() { - vars.insert(name.to_owned(), val); + // TODO: We should not need to call .into_owned() here + // if we keep the contents of all included files in + // memory. + vars.insert(name.to_owned(), val.into_owned()); } self.rules.insert(rule.name.to_owned(), vars); } diff --git a/src/parse.rs b/src/parse.rs index 35fb93e..a8ee60f 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -14,19 +14,19 @@ use std::path::Path; /// A list of variable bindings, as expressed with syntax like: /// key = $val -pub type VarList<'text> = SmallMap<&'text str, EvalString>; +pub type VarList<'text> = SmallMap<&'text str, EvalString<&'text str>>; pub struct Rule<'text> { pub name: &'text str, pub vars: VarList<'text>, } -pub struct Build<'text, Path> { +pub struct Build<'text> { pub rule: &'text str, pub line: usize, - pub outs: Vec, + pub outs: Vec>, pub explicit_outs: usize, - pub ins: Vec, + pub ins: Vec>, pub explicit_ins: usize, pub implicit_ins: usize, pub order_only_ins: usize, @@ -40,21 +40,21 @@ pub struct Pool<'text> { pub depth: usize, } -pub enum Statement<'text, Path> { +pub enum Statement<'text> { Rule(Rule<'text>), - Build(Build<'text, Path>), - Default(Vec), - Include(Path), - Subninja(Path), + Build(Build<'text>), + Default(Vec>), + Include(EvalString<&'text str>), + Subninja(EvalString<&'text str>), Pool(Pool<'text>), } pub struct Parser<'text> { scanner: Scanner<'text>, pub vars: Vars<'text>, - /// Reading paths is very hot when parsing, so we always read into this buffer - /// and then immediately pass in to Loader::path() to canonicalize it in-place. - path_buf: Vec, + /// Reading EvalStrings is very hot when parsing, so we always read into + /// this buffer and then clone it afterwards. + eval_buf: Vec>, } /// Loader maps path strings (as found in build.ninja files) into an arbitrary @@ -74,7 +74,7 @@ impl<'text> Parser<'text> { Parser { scanner: Scanner::new(buf), vars: Vars::default(), - path_buf: Vec::with_capacity(64), + eval_buf: Vec::with_capacity(16), } } @@ -82,10 +82,7 @@ impl<'text> Parser<'text> { self.scanner.format_parse_error(filename, err) } - pub fn read( - &mut self, - loader: &mut L, - ) -> ParseResult>> { + pub fn read(&mut self) -> ParseResult>> { loop { match self.scanner.peek() { '\0' => return Ok(None), @@ -97,26 +94,20 @@ impl<'text> Parser<'text> { self.skip_spaces(); match ident { "rule" => return Ok(Some(Statement::Rule(self.read_rule()?))), - "build" => return Ok(Some(Statement::Build(self.read_build(loader)?))), - "default" => { - return Ok(Some(Statement::Default(self.read_default(loader)?))) - } + "build" => return Ok(Some(Statement::Build(self.read_build()?))), + "default" => return Ok(Some(Statement::Default(self.read_default()?))), "include" => { - let id = match self.read_path(loader)? { - None => return self.scanner.parse_error("expected path"), - Some(p) => p, - }; - return Ok(Some(Statement::Include(id))); + return Ok(Some(Statement::Include(self.read_eval(false)?))); } "subninja" => { - let id = match self.read_path(loader)? { - None => return self.scanner.parse_error("expected path"), - Some(p) => p, - }; - return Ok(Some(Statement::Subninja(id))); + return Ok(Some(Statement::Subninja(self.read_eval(false)?))); } "pool" => return Ok(Some(Statement::Pool(self.read_pool()?))), ident => { + // TODO: The evaluation of global variables should + // be moved out of the parser, so that we can run + // multiple parsers in parallel and then evaluate + // all the variables in series at the end. let val = self.read_vardef()?.evaluate(&[&self.vars]); self.vars.insert(ident, val); } @@ -131,18 +122,34 @@ impl<'text> Parser<'text> { self.skip_spaces(); self.scanner.expect('=')?; self.skip_spaces(); - self.read_eval() + // read_eval will error out if there's nothing to read + if self.scanner.peek_newline() { + self.scanner.skip('\r'); + self.scanner.expect('\n')?; + return Ok(EvalString::new(Vec::new())); + } + let result = self.read_eval(false); + self.scanner.skip('\r'); + self.scanner.expect('\n')?; + result } /// Read a collection of ` foo = bar` variables, with leading indent. - fn read_scoped_vars(&mut self) -> ParseResult> { + fn read_scoped_vars( + &mut self, + variable_name_validator: fn(var: &str) -> bool, + ) -> ParseResult> { let mut vars = VarList::default(); while self.scanner.peek() == ' ' { self.scanner.skip_spaces(); let name = self.read_ident()?; + if !variable_name_validator(name) { + self.scanner + .parse_error(format!("unexpected variable {:?}", name))?; + } self.skip_spaces(); let val = self.read_vardef()?; - vars.insert(name, val.into_owned()); + vars.insert(name, val); } Ok(vars) } @@ -151,7 +158,22 @@ impl<'text> Parser<'text> { let name = self.read_ident()?; self.scanner.skip('\r'); self.scanner.expect('\n')?; - let vars = self.read_scoped_vars()?; + let vars = self.read_scoped_vars(|var| { + matches!( + var, + "command" + | "depfile" + | "dyndep" + | "description" + | "deps" + | "generator" + | "pool" + | "restat" + | "rspfile" + | "rspfile_content" + | "msvc_deps_prefix" + ) + })?; Ok(Rule { name, vars }) } @@ -159,51 +181,42 @@ impl<'text> Parser<'text> { let name = self.read_ident()?; self.scanner.skip('\r'); self.scanner.expect('\n')?; - let vars = self.read_scoped_vars()?; + let vars = self.read_scoped_vars(|var| matches!(var, "depth"))?; let mut depth = 0; - for (key, val) in vars.into_iter() { - match key { - "depth" => { - let val = val.evaluate(&[]); - depth = match val.parse::() { - Ok(d) => d, - Err(err) => { - return self.scanner.parse_error(format!("pool depth: {}", err)) - } - } - } - _ => { - return self - .scanner - .parse_error(format!("unexpected pool attribute {:?}", key)); - } + if let Some((_, val)) = vars.into_iter().next() { + let val = val.evaluate(&[]); + depth = match val.parse::() { + Ok(d) => d, + Err(err) => return self.scanner.parse_error(format!("pool depth: {}", err)), } } Ok(Pool { name, depth }) } - fn read_paths_to( + fn read_unevaluated_paths_to( &mut self, - loader: &mut L, - v: &mut Vec, + v: &mut Vec>, ) -> ParseResult<()> { self.skip_spaces(); - while let Some(path) = self.read_path(loader)? { - v.push(path); + while self.scanner.peek() != ':' + && self.scanner.peek() != '|' + && !self.scanner.peek_newline() + { + v.push(self.read_eval(true)?); self.skip_spaces(); } Ok(()) } - fn read_build(&mut self, loader: &mut L) -> ParseResult> { + fn read_build(&mut self) -> ParseResult> { let line = self.scanner.line; let mut outs = Vec::new(); - self.read_paths_to(loader, &mut outs)?; + self.read_unevaluated_paths_to(&mut outs)?; let explicit_outs = outs.len(); if self.scanner.peek() == '|' { self.scanner.next(); - self.read_paths_to(loader, &mut outs)?; + self.read_unevaluated_paths_to(&mut outs)?; } self.scanner.expect(':')?; @@ -211,7 +224,7 @@ impl<'text> Parser<'text> { let rule = self.read_ident()?; let mut ins = Vec::new(); - self.read_paths_to(loader, &mut ins)?; + self.read_unevaluated_paths_to(&mut ins)?; let explicit_ins = ins.len(); if self.scanner.peek() == '|' { @@ -220,7 +233,7 @@ impl<'text> Parser<'text> { if peek == '|' || peek == '@' { self.scanner.back(); } else { - self.read_paths_to(loader, &mut ins)?; + self.read_unevaluated_paths_to(&mut ins)?; } } let implicit_ins = ins.len() - explicit_ins; @@ -231,7 +244,7 @@ impl<'text> Parser<'text> { self.scanner.back(); } else { self.scanner.expect('|')?; - self.read_paths_to(loader, &mut ins)?; + self.read_unevaluated_paths_to(&mut ins)?; } } let order_only_ins = ins.len() - implicit_ins - explicit_ins; @@ -239,13 +252,13 @@ impl<'text> Parser<'text> { if self.scanner.peek() == '|' { self.scanner.next(); self.scanner.expect('@')?; - self.read_paths_to(loader, &mut ins)?; + self.read_unevaluated_paths_to(&mut ins)?; } let validation_ins = ins.len() - order_only_ins - implicit_ins - explicit_ins; self.scanner.skip('\r'); self.scanner.expect('\n')?; - let vars = self.read_scoped_vars()?; + let vars = self.read_scoped_vars(|_| true)?; Ok(Build { rule, line, @@ -260,9 +273,9 @@ impl<'text> Parser<'text> { }) } - fn read_default(&mut self, loader: &mut L) -> ParseResult> { + fn read_default(&mut self) -> ParseResult>> { let mut defaults = Vec::new(); - self.read_paths_to(loader, &mut defaults)?; + self.read_unevaluated_paths_to(&mut defaults)?; if defaults.is_empty() { return self.scanner.parse_error("expected path"); } @@ -299,79 +312,77 @@ impl<'text> Parser<'text> { Ok(self.scanner.slice(start, end)) } - fn read_eval(&mut self) -> ParseResult> { - // Guaranteed at least one part. - let mut parts = Vec::with_capacity(1); + /// Reads an EvalString. Stops at either a newline, or ' ', ':', '|' if + /// stop_at_path_separators is set, without consuming the character that + /// caused it to stop. + fn read_eval(&mut self, stop_at_path_separators: bool) -> ParseResult> { + self.eval_buf.clear(); let mut ofs = self.scanner.ofs; - let end = loop { - match self.scanner.read() { - '\0' => return self.scanner.parse_error("unexpected EOF"), - '\n' => break self.scanner.ofs - 1, - '\r' if self.scanner.peek() == '\n' => { - self.scanner.next(); - break self.scanner.ofs - 2; - } - '$' => { - let end = self.scanner.ofs - 1; - if end > ofs { - parts.push(EvalPart::Literal(self.scanner.slice(ofs, end))); + // This match block is copied twice, with the only difference being the check for + // spaces, colons, and pipes in the stop_at_path_separators version. We could remove the + // duplication by adding a match branch like `' ' | ':' | '|' if stop_at_path_separators =>` + // or even moving the `if stop_at_path_separators` inside of the match body, but both of + // those options are ~10% slower on a benchmark test of running the loader on llvm-cmake + // ninja files. + let end = if stop_at_path_separators { + loop { + match self.scanner.read() { + '\0' => return self.scanner.parse_error("unexpected EOF"), + ' ' | ':' | '|' | '\n' => { + self.scanner.back(); + break self.scanner.ofs; } - parts.push(self.read_escape()?); - ofs = self.scanner.ofs; + '\r' if self.scanner.peek() == '\n' => { + self.scanner.back(); + break self.scanner.ofs; + } + '$' => { + let end = self.scanner.ofs - 1; + if end > ofs { + self.eval_buf + .push(EvalPart::Literal(self.scanner.slice(ofs, end))); + } + let escape = self.read_escape()?; + self.eval_buf.push(escape); + ofs = self.scanner.ofs; + } + _ => {} } - _ => {} } - }; - if end > ofs { - parts.push(EvalPart::Literal(self.scanner.slice(ofs, end))); - } - Ok(EvalString::new(parts)) - } - - fn read_path(&mut self, loader: &mut L) -> ParseResult> { - self.path_buf.clear(); - loop { - let c = self.scanner.read(); - match c { - '\0' => { - self.scanner.back(); - return self.scanner.parse_error("unexpected EOF"); - } - '$' => { - let part = self.read_escape()?; - match part { - EvalPart::Literal(l) => self.path_buf.extend_from_slice(l.as_bytes()), - EvalPart::VarRef(v) => { - if let Some(v) = self.vars.get(v) { - self.path_buf.extend_from_slice(v.as_bytes()); - } + } else { + loop { + match self.scanner.read() { + '\0' => return self.scanner.parse_error("unexpected EOF"), + '\n' => { + self.scanner.back(); + break self.scanner.ofs; + } + '\r' if self.scanner.peek() == '\n' => { + self.scanner.back(); + break self.scanner.ofs; + } + '$' => { + let end = self.scanner.ofs - 1; + if end > ofs { + self.eval_buf + .push(EvalPart::Literal(self.scanner.slice(ofs, end))); } + let escape = self.read_escape()?; + self.eval_buf.push(escape); + ofs = self.scanner.ofs; } - } - ':' | '|' | ' ' | '\n' | '\r' => { - // Basically any character is allowed in paths, but we want to parse e.g. - // build foo: bar | baz - // such that the colon is not part of the 'foo' path and such that '|' is - // not read as a path. - // Those characters can be embedded by escaping, e.g. "$:". - self.scanner.back(); - break; - } - c => { - self.path_buf.push(c as u8); + _ => {} } } + }; + if end > ofs { + self.eval_buf + .push(EvalPart::Literal(self.scanner.slice(ofs, end))); } - if self.path_buf.is_empty() { - return Ok(None); + if self.eval_buf.is_empty() { + return self.scanner.parse_error(format!("Expected a string")); } - // Performance: this is some of the hottest code in n2 so we cut some corners. - // Safety: see discussion of unicode safety in doc/development.md. - // I looked into switching this to BStr but it would require changing - // a lot of other code to BStr too. - let path_str = unsafe { std::str::from_utf8_unchecked_mut(&mut self.path_buf) }; - let path = loader.path(path_str); - Ok(Some(path)) + Ok(EvalString::new(self.eval_buf.clone())) } /// Read a variable name as found after a '$' in an eval. @@ -441,16 +452,6 @@ impl<'text> Parser<'text> { } } -#[cfg(test)] -struct StringLoader {} -#[cfg(test)] -impl Loader for StringLoader { - type Path = String; - fn path(&mut self, path: &mut str) -> Self::Path { - path.to_string() - } -} - #[cfg(test)] mod tests { use super::*; @@ -474,11 +475,18 @@ mod tests { test_for_line_endings(&["var = 3", "default a b$var c", ""], |test_case| { let mut buf = test_case_buffer(test_case); let mut parser = Parser::new(&mut buf); - let default = match parser.read(&mut StringLoader {}).unwrap().unwrap() { + let default = match parser.read().unwrap().unwrap() { Statement::Default(d) => d, _ => panic!("expected default"), }; - assert_eq!(default, vec!["a", "b3", "c"]); + assert_eq!( + default, + vec![ + EvalString::new(vec![EvalPart::Literal("a")]), + EvalString::new(vec![EvalPart::Literal("b"), EvalPart::VarRef("var")]), + EvalString::new(vec![EvalPart::Literal("c")]), + ] + ); }); } @@ -486,7 +494,7 @@ mod tests { fn parse_dot_in_eval() { let mut buf = test_case_buffer("x = $y.z\n"); let mut parser = Parser::new(&mut buf); - parser.read(&mut StringLoader {}).unwrap(); + parser.read().unwrap(); let x = parser.vars.get("x").unwrap(); assert_eq!(x, ".z"); } @@ -495,7 +503,7 @@ mod tests { fn parse_dot_in_rule() { let mut buf = test_case_buffer("rule x.y\n command = x\n"); let mut parser = Parser::new(&mut buf); - let stmt = parser.read(&mut StringLoader {}).unwrap().unwrap(); + let stmt = parser.read().unwrap().unwrap(); assert!(matches!( stmt, Statement::Rule(Rule { @@ -509,7 +517,7 @@ mod tests { fn parse_trailing_newline() { let mut buf = test_case_buffer("build$\n foo$\n : $\n touch $\n\n"); let mut parser = Parser::new(&mut buf); - let stmt = parser.read(&mut StringLoader {}).unwrap().unwrap(); + let stmt = parser.read().unwrap().unwrap(); assert!(matches!( stmt, Statement::Build(Build { rule: "touch", .. }) diff --git a/src/scanner.rs b/src/scanner.rs index 435984b..af06d2d 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -33,6 +33,16 @@ impl<'a> Scanner<'a> { pub fn peek(&self) -> char { unsafe { *self.buf.get_unchecked(self.ofs) as char } } + pub fn peek_newline(&self) -> bool { + if self.peek() == '\n' { + return true; + } + if self.ofs >= self.buf.len() - 1 { + return false; + } + let peek2 = unsafe { *self.buf.get_unchecked(self.ofs + 1) as char }; + self.peek() == '\r' && peek2 == '\n' + } pub fn next(&mut self) { if self.peek() == '\n' { self.line += 1; diff --git a/tests/e2e/basic.rs b/tests/e2e/basic.rs index a7c375b..fca23b8 100644 --- a/tests/e2e/basic.rs +++ b/tests/e2e/basic.rs @@ -330,3 +330,124 @@ fn builddir() -> anyhow::Result<()> { space.read("foo/.n2_db")?; Ok(()) } + +#[test] +fn bad_rule_variable() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule my_rule + command = touch $out + my_var = foo + +build out: my_rule +", + )?; + + let out = space.run(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "unexpected variable \"my_var\""); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn deps_evaluate_build_bindings() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule touch + command = touch $out +rule copy + command = cp $in $out +build foo: copy ${my_dep} + my_dep = bar +build bar: touch +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + space.read("foo")?; + Ok(()) +} + +#[cfg(unix)] +#[test] +fn looks_up_values_from_build() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule copy_rspfile + command = cp $out.rsp $out + rspfile = $out.rsp + +build foo: copy_rspfile + rspfile_content = Hello, world! +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + assert_eq!(space.read("foo")?, b"Hello, world!"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn build_bindings_arent_recursive() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule write_file + command = echo $my_var > $out + +build foo: write_file + my_var = Hello,$my_var_2 world! + my_var_2 = my_var_2_value +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + assert_eq!(space.read("foo")?, b"Hello, world!\n"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn empty_variable_binding() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +empty_var = + +rule write_file + command = echo $my_var > $out + +build foo: write_file + my_var = Hello,$empty_var world! +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + assert_eq!(space.read("foo")?, b"Hello, world!\n"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn empty_build_variable() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule write_file + command = echo $my_var > $out + +build foo: write_file + empty = + my_var = Hello, world! +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + assert_eq!(space.read("foo")?, b"Hello, world!\n"); + Ok(()) +}