From c374a11d01744961d650e697e7362325e9e68dc1 Mon Sep 17 00:00:00 2001 From: jjcnn <38888011+jjcnn@users.noreply.github.com> Date: Thu, 24 Oct 2024 14:56:37 +0200 Subject: [PATCH] Simplify the return path analysis (#6541) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description The return path analysis ensures that if a method must return a value then the spine of the method returns a value. The analysis traverses a linearized version of the CFG (one which ignores branching expressions such as `if-then-else` and loops, and simply treats them as a single node) of each method from the (unique) entry point to the (unique) exit point. If there is ever a node in the graph that does not have outgoing edges, then an error is thrown. When constructing such a linearized CFG we have so far treated local `impl`s as branches, meaning that the spine was in fact not linear: ``` fn main() -> u64 { // This point has two outgoing edges // One goes to here impl core::ops::Eq for X { fn eq(self, other: Self) -> bool { asm(r1: self, r2: other, r3) { eq r3 r2 r1; r3: bool } } } // The other goes to here if X::Y(true) == X::Y(true) { a } else { a } } ``` This has been the case even though the edge into a local `impl` does not in fact represent legal control flow. This incorrect construction has made the traversal of the spine-CFG very convoluted and difficult to follow. This PR fixes the incorrect construction, and simplifies the traversal of the spine so that it is clear what is going on. I have also added an additional test to show that methods inside local `impl`s are in fact analyzed separately, even though there is no longer an edge going into the `impl` from the surrounding function. ## Checklist - [x] I have linked to any relevant issues. - [x] I have commented my code, particularly in hard-to-understand areas. - [x] I have updated the documentation where relevant (API docs, the reference, and the Sway book). - [ ] If my change requires substantial documentation changes, I have [requested support from the DevRel team](https://github.com/FuelLabs/devrel-requests/issues/new/choose) - [x] I have added tests that prove my fix is effective or that my feature works. - [x] I have added (or requested a maintainer to add) the necessary `Breaking*` or `New Feature` labels where relevant. - [x] I have done my best to ensure that my PR adheres to [the Fuel Labs Code Review Standards](https://github.com/FuelLabs/rfcs/blob/master/text/code-standards/external-contributors.md). - [x] I have requested a review from the relevant team or maintainers. --------- Co-authored-by: IGI-111 Co-authored-by: Joshua Batty Co-authored-by: João Matos --- .../analyze_return_paths.rs | 133 +++++++++--------- .../return_path_analysis/src/main.sw | 44 +++++- .../return_path_analysis/test.toml | 5 +- 3 files changed, 110 insertions(+), 72 deletions(-) diff --git a/sway-core/src/control_flow_analysis/analyze_return_paths.rs b/sway-core/src/control_flow_analysis/analyze_return_paths.rs index 863e5ccfc69..dc32308875f 100644 --- a/sway-core/src/control_flow_analysis/analyze_return_paths.rs +++ b/sway-core/src/control_flow_analysis/analyze_return_paths.rs @@ -23,11 +23,11 @@ impl<'cfg> ControlFlowGraph<'cfg> { let mut graph = ControlFlowGraph::new(engines); // do a depth first traversal and cover individual inner ast nodes - let mut leaves = vec![]; + let mut leaf_opt = None; for ast_entrypoint in module_nodes { - match connect_node(engines, ast_entrypoint, &mut graph, &leaves) { - Ok(NodeConnection::NextStep(nodes)) => { - leaves = nodes; + match connect_node(engines, ast_entrypoint, &mut graph, leaf_opt) { + Ok(NodeConnection::NextStep(node_opt)) => { + leaf_opt = node_opt; } Ok(_) => {} Err(mut e) => { @@ -72,6 +72,12 @@ impl<'cfg> ControlFlowGraph<'cfg> { errors } + /// Traverses the spine of a function to ensure that it does return if a return value is + /// expected. The spine of the function does not include branches such as if-then-elses and + /// loops. Those branches are ignored, and a branching expression is represented as a single + /// node in the graph. The analysis continues once the branches join again. This means that the + /// spine is linear, so every node has at most one outgoing edge. The graph is assumed to have + /// been constructed this way. fn ensure_all_paths_reach_exit( &self, engines: &Engines, @@ -80,32 +86,28 @@ impl<'cfg> ControlFlowGraph<'cfg> { function_name: &IdentUnique, return_ty: &TypeInfo, ) -> Vec { - let mut rovers = vec![entry_point]; - let mut visited = vec![]; + let mut rover = entry_point; let mut errors = vec![]; - while !rovers.is_empty() && rovers[0] != exit_point { - rovers.retain(|idx| *idx != exit_point); - let mut next_rovers = vec![]; - let mut last_discovered_span; - for rover in rovers { - visited.push(rover); - last_discovered_span = match &self.graph[rover] { - ControlFlowGraphNode::ProgramNode { node, .. } => Some(node.span.clone()), - ControlFlowGraphNode::MethodDeclaration { span, .. } => Some(span.clone()), - _ => None, - }; - let mut neighbors = self - .graph - .neighbors_directed(rover, petgraph::Direction::Outgoing) - .collect::>(); - if neighbors.is_empty() && !return_ty.is_unit() { - let span = match last_discovered_span { - Some(ref o) => o.clone(), - None => { + while rover != exit_point { + let neighbors = self + .graph + .neighbors_directed(rover, petgraph::Direction::Outgoing) + .collect::>(); + + // The graph is supposed to be a single path, so at most one outgoing neighbor is allowed. + assert!(neighbors.len() <= 1); + + if neighbors.is_empty() { + if !return_ty.is_unit() { + // A return is expected, but none is found. Report an error. + let span = match &self.graph[rover] { + ControlFlowGraphNode::ProgramNode { node, .. } => node.span.clone(), + ControlFlowGraphNode::MethodDeclaration { span, .. } => span.clone(), + _ => { errors.push(CompileError::Internal( "Attempted to construct return path error \ - but no source span was found.", + but no source span was found.", Span::dummy(), )); return errors; @@ -113,17 +115,17 @@ impl<'cfg> ControlFlowGraph<'cfg> { }; let function_name: Ident = function_name.into(); errors.push(CompileError::PathDoesNotReturn { - // TODO: unwrap_to_node is a shortcut. In reality, the graph type should be - // different. To save some code duplication, span, function_name: function_name.clone(), ty: engines.help_out(return_ty).to_string(), }); } - next_rovers.append(&mut neighbors); + + // No further neighbors, so we're done. + break; } - next_rovers.retain(|idx| !visited.contains(idx)); - rovers = next_rovers; + + rover = neighbors[0]; } errors @@ -133,7 +135,7 @@ impl<'cfg> ControlFlowGraph<'cfg> { /// The resulting edges from connecting a node to the graph. enum NodeConnection { /// This represents a node that steps on to the next node. - NextStep(Vec), + NextStep(Option), /// This represents a return or implicit return node, which aborts the stepwise flow. Return(NodeIndex), } @@ -142,7 +144,7 @@ fn connect_node<'eng: 'cfg, 'cfg>( engines: &'eng Engines, node: &ty::TyAstNode, graph: &mut ControlFlowGraph<'cfg>, - leaves: &[NodeIndex], + leaf_opt: Option, ) -> Result> { match &node.content { ty::TyAstNodeContent::Expression(ty::TyExpression { @@ -154,8 +156,8 @@ fn connect_node<'eng: 'cfg, 'cfg>( .. }) => { let this_index = graph.add_node(ControlFlowGraphNode::from_node(node)); - for leaf_ix in leaves { - graph.add_edge(*leaf_ix, this_index, "".into()); + if let Some(leaf_ix) = leaf_opt { + graph.add_edge(leaf_ix, this_index, "".into()); } Ok(NodeConnection::Return(this_index)) } @@ -167,25 +169,25 @@ fn connect_node<'eng: 'cfg, 'cfg>( // since we don't really care about what the loop body contains when detecting // divergent paths let node = graph.add_node(ControlFlowGraphNode::from_node(node)); - for leaf in leaves { - graph.add_edge(*leaf, node, "while loop entry".into()); + if let Some(leaf) = leaf_opt { + graph.add_edge(leaf, node, "while loop entry".into()); } - Ok(NodeConnection::NextStep(vec![node])) + Ok(NodeConnection::NextStep(Some(node))) } ty::TyAstNodeContent::Expression(ty::TyExpression { .. }) => { let entry = graph.add_node(ControlFlowGraphNode::from_node(node)); // insert organizational dominator node // connected to all current leaves - for leaf in leaves { - graph.add_edge(*leaf, entry, "".into()); + if let Some(leaf) = leaf_opt { + graph.add_edge(leaf, entry, "".into()); } - Ok(NodeConnection::NextStep(vec![entry])) + Ok(NodeConnection::NextStep(Some(entry))) } - ty::TyAstNodeContent::SideEffect(_) => Ok(NodeConnection::NextStep(leaves.to_vec())), + ty::TyAstNodeContent::SideEffect(_) => Ok(NodeConnection::NextStep(leaf_opt)), ty::TyAstNodeContent::Declaration(decl) => Ok(NodeConnection::NextStep( - connect_declaration(engines, node, decl, graph, leaves)?, + connect_declaration(engines, node, decl, graph, leaf_opt)?, )), - ty::TyAstNodeContent::Error(_, _) => Ok(NodeConnection::NextStep(vec![])), + ty::TyAstNodeContent::Error(_, _) => Ok(NodeConnection::NextStep(None)), } } @@ -194,8 +196,8 @@ fn connect_declaration<'eng: 'cfg, 'cfg>( node: &ty::TyAstNode, decl: &ty::TyDecl, graph: &mut ControlFlowGraph<'cfg>, - leaves: &[NodeIndex], -) -> Result, Vec> { + leaf_opt: Option, +) -> Result, Vec> { let decl_engine = engines.de(); match decl { ty::TyDecl::TraitDecl(_) @@ -206,39 +208,33 @@ fn connect_declaration<'eng: 'cfg, 'cfg>( | ty::TyDecl::StorageDecl(_) | ty::TyDecl::TypeAliasDecl(_) | ty::TyDecl::TraitTypeDecl(_) - | ty::TyDecl::GenericTypeForFunctionScope(_) => Ok(leaves.to_vec()), + | ty::TyDecl::GenericTypeForFunctionScope(_) => Ok(leaf_opt), ty::TyDecl::VariableDecl(_) | ty::TyDecl::ConstantDecl(_) | ty::TyDecl::ConfigurableDecl(_) => { let entry_node = graph.add_node(ControlFlowGraphNode::from_node(node)); - for leaf in leaves { - graph.add_edge(*leaf, entry_node, "".into()); + if let Some(leaf) = leaf_opt { + graph.add_edge(leaf, entry_node, "".into()); } - Ok(vec![entry_node]) + Ok(Some(entry_node)) } ty::TyDecl::FunctionDecl(ty::FunctionDecl { decl_id, .. }) => { let fn_decl = decl_engine.get_function(decl_id); let entry_node = graph.add_node(ControlFlowGraphNode::from_node(node)); - for leaf in leaves { - graph.add_edge(*leaf, entry_node, "".into()); - } + // Do not connect the leaves to the function entry point, since control cannot flow from them into the function. connect_typed_fn_decl(engines, &fn_decl, graph, entry_node)?; - Ok(leaves.to_vec()) + Ok(leaf_opt) } ty::TyDecl::ImplSelfOrTrait(ty::ImplSelfOrTrait { decl_id, .. }) => { let impl_trait = decl_engine.get_impl_self_or_trait(decl_id); let ty::TyImplSelfOrTrait { trait_name, items, .. } = &*impl_trait; - let entry_node = graph.add_node(ControlFlowGraphNode::from_node(node)); - for leaf in leaves { - graph.add_edge(*leaf, entry_node, "".into()); - } - - connect_impl_trait(engines, trait_name, graph, items, entry_node)?; - Ok(leaves.to_vec()) + // Do not connect the leaves to the impl entry point, since control cannot flow from them into the impl. + connect_impl_trait(engines, trait_name, graph, items)?; + Ok(leaf_opt) } - ty::TyDecl::ErrorRecovery(..) => Ok(leaves.to_vec()), + ty::TyDecl::ErrorRecovery(..) => Ok(leaf_opt), } } @@ -252,7 +248,6 @@ fn connect_impl_trait<'eng: 'cfg, 'cfg>( trait_name: &CallPath, graph: &mut ControlFlowGraph<'cfg>, items: &[TyImplItem], - entry_node: NodeIndex, ) -> Result<(), Vec> { let decl_engine = engines.de(); let mut methods_and_indexes = vec![]; @@ -267,7 +262,6 @@ fn connect_impl_trait<'eng: 'cfg, 'cfg>( method_decl_ref: method_decl_ref.clone(), engines, }); - graph.add_edge(entry_node, fn_decl_entry_node, "".into()); // connect the impl declaration node to the functions themselves, as all trait functions are // public if the trait is in scope connect_typed_fn_decl(engines, &fn_decl, graph, fn_decl_entry_node)?; @@ -306,7 +300,7 @@ fn connect_typed_fn_decl<'eng: 'cfg, 'cfg>( let type_engine = engines.te(); let fn_exit_node = graph.add_node(format!("\"{}\" fn exit", fn_decl.name.as_str()).into()); let return_nodes = - depth_first_insertion_code_block(engines, &fn_decl.body, graph, &[entry_node])?; + depth_first_insertion_code_block(engines, &fn_decl.body, graph, Some(entry_node))?; for node in return_nodes { graph.add_edge(node, fn_exit_node, "return".into()); } @@ -328,16 +322,15 @@ fn depth_first_insertion_code_block<'eng: 'cfg, 'cfg>( engines: &'eng Engines, node_content: &ty::TyCodeBlock, graph: &mut ControlFlowGraph<'cfg>, - leaves: &[NodeIndex], + init_leaf_opt: Option, ) -> Result> { let mut errors = vec![]; - - let mut leaves = leaves.to_vec(); + let mut leaf_opt = init_leaf_opt; let mut return_nodes = vec![]; for node in node_content.contents.iter() { - match connect_node(engines, node, graph, &leaves) { + match connect_node(engines, node, graph, leaf_opt) { Ok(this_node) => match this_node { - NodeConnection::NextStep(nodes) => leaves = nodes, + NodeConnection::NextStep(node_opt) => leaf_opt = node_opt, NodeConnection::Return(node) => { return_nodes.push(node); } diff --git a/test/src/e2e_vm_tests/test_programs/should_fail/return_path_analysis/src/main.sw b/test/src/e2e_vm_tests/test_programs/should_fail/return_path_analysis/src/main.sw index 87ad204b7fc..4e40cf402b3 100644 --- a/test/src/e2e_vm_tests/test_programs/should_fail/return_path_analysis/src/main.sw +++ b/test/src/e2e_vm_tests/test_programs/should_fail/return_path_analysis/src/main.sw @@ -517,9 +517,51 @@ fn f() -> bool { return false; }; - // No value returned here, which is an error. + // No value returned here. The return path analysis should report an error, even though the +} + + +// Check that return path analysis is applied to local impl methods. +fn g() -> bool { + + struct X { + y: bool, + } + + impl core::ops::Eq for X { + fn eq(self, other: Self) -> bool { + if true { + return true; + } else { + return false; + }; + } + } + + let x = X { y : false }; + let y = X { y : true } ; + + x == y +} + +// Check that return path analysis is applied to local functions. +// Local functions are currently not supported, but once they are added this test should fail for +// the same reason as for the local impl in the function g(). +fn h() -> bool { + + fn tester(other: bool) -> bool { + if true { + return true; + } else { + return false; + }; + } + + tester(true) } fn main() { let _ = f(); + let _ = g(); + let _ = h(); } diff --git a/test/src/e2e_vm_tests/test_programs/should_fail/return_path_analysis/test.toml b/test/src/e2e_vm_tests/test_programs/should_fail/return_path_analysis/test.toml index 588a46a1294..1d5eb2f08b5 100644 --- a/test/src/e2e_vm_tests/test_programs/should_fail/return_path_analysis/test.toml +++ b/test/src/e2e_vm_tests/test_programs/should_fail/return_path_analysis/test.toml @@ -1,6 +1,9 @@ category = "fail" +# check: $()Declaring nested functions is currently not implemented. +# check: $()Could not find symbol "tester" in this scope. # check: $()This path must return a value of type "bool" from function "f", but it does not. -# check: $()Aborting due to 1 error. +# check: $()This path must return a value of type "bool" from function "eq", but it does not. +# check: $()Aborting due to 4 errors.