Cranelift: Split out dominator tree's depth-first traversal into a re…

…usable iterator (#8640) We intend to use this when computing liveness of GC references in `cranelift-frontend` to manually construct safepoints and ultimately remove `r{32,64}` reference types from CLIF, `cranelift-codegen`, and `regalloc2`. Co-authored-by: Trevor Elliott <[email protected]>
bytecodealliance · May 20, 2024 · cacfaf8 · cacfaf8
1 parent 0ea10ac
commit cacfaf8
Show file tree

Hide file tree

Showing 6 changed files with 227 additions and 84 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml
@@ -43,6 +43,7 @@ rustc-hash  = { workspace = true }
 [dev-dependencies]
 criterion = { workspace = true }
 similar = "2.1.0"
+env_logger = { workspace = true }
 
 [build-dependencies]
 cranelift-codegen-meta = { path = "meta", version = "0.109.0" }

diff --git a/cranelift/codegen/src/dominator_tree.rs b/cranelift/codegen/src/dominator_tree.rs
@@ -5,6 +5,7 @@ use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
 use crate::ir::{Block, Function, Inst, Layout, ProgramPoint};
 use crate::packed_option::PackedOption;
 use crate::timing;
+use crate::traversals::Dfs;
 use alloc::vec::Vec;
 use core::cmp;
 use core::cmp::Ordering;
@@ -14,9 +15,6 @@ use core::mem;
 /// room for modifications of the dominator tree.
 const STRIDE: u32 = 4;
 
-/// Special RPO numbers used during `compute_postorder`.
-const SEEN: u32 = 1;
-
 /// Dominator tree node. We keep one of these per block.
 #[derive(Clone, Default)]
 struct DomNode {
@@ -34,21 +32,15 @@ struct DomNode {
     idom: PackedOption<Inst>,
 }
 
-/// DFT stack state marker for computing the cfg postorder.
-enum Visit {
-    First,
-    Last,
-}
-
 /// The dominator tree for a single function.
 pub struct DominatorTree {
     nodes: SecondaryMap<Block, DomNode>,
 
     /// CFG post-order of all reachable blocks.
     postorder: Vec<Block>,
 
-    /// Scratch memory used by `compute_postorder()`.
-    stack: Vec<(Visit, Block)>,
+    /// Scratch traversal state used by `compute_postorder()`.
+    dfs: Dfs,
 
     valid: bool,
 }
@@ -225,7 +217,7 @@ impl DominatorTree {
         Self {
             nodes: SecondaryMap::new(),
             postorder: Vec::new(),
-            stack: Vec::new(),
+            dfs: Dfs::new(),
             valid: false,
         }
     }
@@ -236,7 +228,7 @@ impl DominatorTree {
         let mut domtree = Self {
             nodes: SecondaryMap::with_capacity(block_capacity),
             postorder: Vec::with_capacity(block_capacity),
-            stack: Vec::new(),
+            dfs: Dfs::new(),
             valid: false,
         };
         domtree.compute(func, cfg);
@@ -257,7 +249,6 @@ impl DominatorTree {
     pub fn clear(&mut self) {
         self.nodes.clear();
         self.postorder.clear();
-        debug_assert!(self.stack.is_empty());
         self.valid = false;
     }
 
@@ -276,73 +267,7 @@ impl DominatorTree {
     fn compute_postorder(&mut self, func: &Function) {
         self.clear();
         self.nodes.resize(func.dfg.num_blocks());
-
-        // This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
-        // post-order of the blocks that are reachable form the entry block. A DFT post-order is not
-        // unique. The specific order we get is controlled by the order each node's children are
-        // visited.
-        //
-        // We view the CFG as a graph where each `BlockCall` value of a terminating branch
-        // instruction is an edge. A consequence of this is that we visit successor nodes in the
-        // reverse order specified by the branch instruction that terminates the basic block.
-        // (Reversed because we are using a stack to control traversal, and push the successors in
-        // the order the branch instruction specifies -- there's no good reason for this particular
-        // order.)
-        //
-        // During this algorithm only, use `rpo_number` to hold the following state:
-        //
-        //   0:    block has not yet had its first visit
-        //   SEEN: block has been visited at least once, implying that all of its successors are on
-        //         the stack
-
-        match func.layout.entry_block() {
-            Some(block) => {
-                self.stack.push((Visit::First, block));
-            }
-            None => return,
-        }
-
-        while let Some((visit, block)) = self.stack.pop() {
-            match visit {
-                Visit::First => {
-                    if self.nodes[block].rpo_number == 0 {
-                        // This is the first time we pop the block, so we need to scan its
-                        // successors and then revisit it.
-                        self.nodes[block].rpo_number = SEEN;
-                        self.stack.push((Visit::Last, block));
-                        if let Some(inst) = func.stencil.layout.last_inst(block) {
-                            // Heuristic: chase the children in reverse. This puts the first
-                            // successor block first in the postorder, all other things being
-                            // equal, which tends to prioritize loop backedges over out-edges,
-                            // putting the edge-block closer to the loop body and minimizing
-                            // live-ranges in linear instruction space. This heuristic doesn't have
-                            // any effect on the computation of dominators, and is purely for other
-                            // consumers of the postorder we cache here.
-                            for block in func.stencil.dfg.insts[inst]
-                                .branch_destination(&func.stencil.dfg.jump_tables)
-                                .iter()
-                                .rev()
-                            {
-                                let succ = block.block(&func.stencil.dfg.value_lists);
-
-                                // This is purely an optimization to avoid additional iterations of
-                                // the loop, and is not required; it's merely inlining the check
-                                // from the outer conditional of this case to avoid the extra loop
-                                // iteration.
-                                if self.nodes[succ].rpo_number == 0 {
-                                    self.stack.push((Visit::First, succ))
-                                }
-                            }
-                        }
-                    }
-                }
-
-                Visit::Last => {
-                    // We've finished all this node's successors.
-                    self.postorder.push(block);
-                }
-            }
-        }
+        self.postorder.extend(self.dfs.post_order_iter(func));
     }
 
     /// Build a dominator tree from a control flow graph using Keith D. Cooper's
@@ -467,7 +392,6 @@ impl DominatorTreePreorder {
     /// Recompute this data structure to match `domtree`.
     pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
         self.nodes.clear();
-        debug_assert_eq!(self.stack.len(), 0);
 
         // Step 1: Populate the child and sibling links.
         //

diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs
@@ -49,6 +49,7 @@ pub mod loop_analysis;
 pub mod print_errors;
 pub mod settings;
 pub mod timing;
+pub mod traversals;
 pub mod verifier;
 pub mod write;
 

diff --git a/cranelift/codegen/src/traversals.rs b/cranelift/codegen/src/traversals.rs
@@ -0,0 +1,216 @@
+//! Traversals over the IR.
+
+use crate::ir;
+use alloc::vec::Vec;
+use core::fmt::Debug;
+use core::hash::Hash;
+use cranelift_entity::EntitySet;
+
+/// A low-level DFS traversal event: either entering or exiting the traversal of
+/// a block.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub enum Event {
+    /// Entering traversal of a block.
+    ///
+    /// Processing a block upon this event corresponds to a pre-order,
+    /// depth-first traversal.
+    Enter,
+
+    /// Exiting traversal of a block.
+    ///
+    /// Processing a block upon this event corresponds to a post-order,
+    /// depth-first traversal.
+    Exit,
+}
+
+/// A depth-first traversal.
+///
+/// This is a fairly low-level traversal type, and is generally intended to be
+/// used as a building block for making specific pre-order or post-order
+/// traversals for whatever problem is at hand.
+///
+/// This type may be reused multiple times across different passes or functions
+/// and will internally reuse any heap allocations its already made.
+///
+/// Traversal is not recursive.
+#[derive(Debug, Default, Clone)]
+pub struct Dfs {
+    stack: Vec<(Event, ir::Block)>,
+    seen: EntitySet<ir::Block>,
+}
+
+impl Dfs {
+    /// Construct a new depth-first traversal.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Perform a depth-first traversal over the given function.
+    ///
+    /// Yields pairs of `(Event, ir::Block)`.
+    ///
+    /// This iterator can be used to perform either pre- or post-order
+    /// traversals, or a combination of the two.
+    pub fn iter<'a>(&'a mut self, func: &'a ir::Function) -> DfsIter<'a> {
+        self.seen.clear();
+        self.stack.clear();
+        if let Some(e) = func.layout.entry_block() {
+            self.stack.push((Event::Enter, e));
+        }
+        DfsIter { dfs: self, func }
+    }
+
+    /// Perform a pre-order traversal over the given function.
+    ///
+    /// Yields `ir::Block` items.
+    pub fn pre_order_iter<'a>(&'a mut self, func: &'a ir::Function) -> DfsPreOrderIter<'a> {
+        DfsPreOrderIter(self.iter(func))
+    }
+
+    /// Perform a post-order traversal over the given function.
+    ///
+    /// Yields `ir::Block` items.
+    pub fn post_order_iter<'a>(&'a mut self, func: &'a ir::Function) -> DfsPostOrderIter<'a> {
+        DfsPostOrderIter(self.iter(func))
+    }
+}
+
+/// An iterator that yields pairs of `(Event, ir::Block)` items as it performs a
+/// depth-first traversal over its associated function.
+pub struct DfsIter<'a> {
+    dfs: &'a mut Dfs,
+    func: &'a ir::Function,
+}
+
+impl Iterator for DfsIter<'_> {
+    type Item = (Event, ir::Block);
+
+    fn next(&mut self) -> Option<(Event, ir::Block)> {
+        let (event, block) = self.dfs.stack.pop()?;
+
+        if event == Event::Enter && self.dfs.seen.insert(block) {
+            self.dfs.stack.push((Event::Exit, block));
+            if let Some(inst) = self.func.layout.last_inst(block) {
+                self.dfs.stack.extend(
+                    self.func.dfg.insts[inst]
+                        .branch_destination(&self.func.dfg.jump_tables)
+                        .iter()
+                        // Heuristic: chase the children in reverse. This puts
+                        // the first successor block first in the postorder, all
+                        // other things being equal, which tends to prioritize
+                        // loop backedges over out-edges, putting the edge-block
+                        // closer to the loop body and minimizing live-ranges in
+                        // linear instruction space. This heuristic doesn't have
+                        // any effect on the computation of dominators, and is
+                        // purely for other consumers of the postorder we cache
+                        // here.
+                        .rev()
+                        .map(|block| block.block(&self.func.dfg.value_lists))
+                        // This is purely an optimization to avoid additional
+                        // iterations of the loop, and is not required; it's
+                        // merely inlining the check from the outer conditional
+                        // of this case to avoid the extra loop iteration. This
+                        // also avoids potential excess stack growth.
+                        .filter(|block| !self.dfs.seen.contains(*block))
+                        .map(|block| (Event::Enter, block)),
+                );
+            }
+        }
+
+        Some((event, block))
+    }
+}
+
+/// An iterator that yields `ir::Block` items during a depth-first, pre-order
+/// traversal over its associated function.
+pub struct DfsPreOrderIter<'a>(DfsIter<'a>);
+
+impl Iterator for DfsPreOrderIter<'_> {
+    type Item = ir::Block;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            match self.0.next()? {
+                (Event::Enter, b) => return Some(b),
+                (Event::Exit, _) => continue,
+            }
+        }
+    }
+}
+
+/// An iterator that yields `ir::Block` items during a depth-first, post-order
+/// traversal over its associated function.
+pub struct DfsPostOrderIter<'a>(DfsIter<'a>);
+
+impl Iterator for DfsPostOrderIter<'_> {
+    type Item = ir::Block;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            match self.0.next()? {
+                (Event::Exit, b) => return Some(b),
+                (Event::Enter, _) => continue,
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::{types::I32, Function, InstBuilder, TrapCode};
+
+    #[test]
+    fn test_dfs_traversal() {
+        let _ = env_logger::try_init();
+
+        let mut func = Function::new();
+
+        let block0 = func.dfg.make_block();
+        let v0 = func.dfg.append_block_param(block0, I32);
+        let block1 = func.dfg.make_block();
+        let block2 = func.dfg.make_block();
+        let block3 = func.dfg.make_block();
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        // block0(v0):
+        //   br_if v0, block2, trap_block
+        cur.insert_block(block0);
+        cur.ins().brif(v0, block2, &[], block3, &[]);
+
+        // block3:
+        //   trap user0
+        cur.insert_block(block3);
+        cur.ins().trap(TrapCode::User(0));
+
+        // block1:
+        //   v1 = iconst.i32 1
+        //   v2 = iadd v0, v1
+        //   jump block0(v2)
+        cur.insert_block(block1);
+        let v1 = cur.ins().iconst(I32, 1);
+        let v2 = cur.ins().iadd(v0, v1);
+        cur.ins().jump(block0, &[v2]);
+
+        // block2:
+        //   return v0
+        cur.insert_block(block2);
+        cur.ins().return_(&[v0]);
+
+        let mut dfs = Dfs::new();
+
+        assert_eq!(
+            dfs.iter(&func).collect::<Vec<_>>(),
+            vec![
+                (Event::Enter, block0),
+                (Event::Enter, block2),
+                (Event::Exit, block2),
+                (Event::Enter, block3),
+                (Event::Exit, block3),
+                (Event::Exit, block0)
+            ],
+        );
+    }
+}